コード例 #1
0
    def __init__(self, ip_addr, kafka_config_infile, s3bucket_config_infile):
        if not os.path.exists('./tmp'):
            os.makedirs('./tmp')
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(levelname)s %(message)s',
                            filename='./tmp/kafka_producer.log',
                            filemode='w')
        self.logger = logging.getLogger('py4j')

        self.kafka_config = helpers.parse_config(kafka_config_infile)
        self.s3bucket_config = helpers.parse_config(s3bucket_config_infile)
        self.producer = KafkaProducer(bootstrap_servers=ip_addr)
コード例 #2
0
ファイル: producers.py プロジェクト: wgong/TaxiOptimizer
    def __init__(self, kafka_configfile, schema_file, s3_configfile):
        """
        class constructor that initializes the instance according to the configurations
        of the S3 bucket and Kafka
        :type kafka_configfile: str     path to kafka config file
        :type schema_file     : str     path to schema file
        :type s3_configfile   : str     path to S3 config file
        """
        self.kafka_config = helpers.parse_config(kafka_configfile)
        self.schema = helpers.parse_config(schema_file)
        self.s3_config = helpers.parse_config(s3_configfile)

        self.producer = KafkaProducer(
            bootstrap_servers=self.kafka_config["BROKERS_IP"])
コード例 #3
0
    def __init__(self, s3_configfile, schema_configfile, psql_configfile):
        """
        class constructor that initializes the instance according to the configurations
        of the S3 bucket, raw data and PostgreSQL table
        :type s3_configfile:     str        path to s3 config file
        :type schema_configfile: str        path to schema config file
        :type psql_configfile:   str        path to psql config file
        """
        self.s3_config = helpers.parse_config(s3_configfile)
        self.schema = helpers.parse_config(schema_configfile)
        self.psql_config = helpers.parse_config(psql_configfile)

        self.sc = pyspark.SparkContext.getOrCreate()
        self.sc.setLogLevel("ERROR")
コード例 #4
0
    def test_parse_config(self):
        # test if correctly parses the config file
        conf = {"field1": "val1", "field2": {"subfield1": 2, "subfield2": "3"}}

        with patch("__builtin__.open",
                   mock_open(read_data=json.dumps(conf))) as mock_file:

            self.assertEqual(conf, helpers.parse_config(mock_file),
                             "fail to properly read config from file")
コード例 #5
0
ファイル: streamers.py プロジェクト: wgong/TaxiOptimizer
    def __init__(self, kafka_configfile, schema_configfile, stream_configfile,
                 start_offset):
        """
        class constructor that initializes the instance according to the configurations
        of Kafka (brokers, topic, offsets), data schema and batch interval for streaming
        :type kafka_configfile:  str        path to s3 config file
        :type schema_configfile: str        path to schema config file
        :type stream_configfile: str        path to stream config file
        :type start_offset:      int        offset from which to read from partitions of Kafka topic
        """
        self.kafka_config = helpers.parse_config(kafka_configfile)
        self.stream_config = helpers.parse_config(stream_configfile)
        self.schema = helpers.parse_config(schema_configfile)

        self.start_offset = start_offset

        self.sc = pyspark.SparkContext().getOrCreate()
        self.ssc = pyspark.streaming.StreamingContext(
            self.sc, self.stream_config["INTERVAL"])

        self.sc.setLogLevel("ERROR")
コード例 #6
0
 def __init__(self, kafka_config_infile, ecg_spark_config_infile, postgres_config_infile, s3bucket_config_infile,
              batch_interval):
     if not os.path.exists('./tmp'):
         os.makedirs('./tmp')
     logging.basicConfig(level=logging.DEBUG,
                         format='%(asctime)s %(levelname)s %(message)s',
                         filename='./tmp/spark_consumer.log',
                         filemode='w')
     self.logger = logging.getLogger('py4j')
     self.logger.setLevel(logging.WARN)
     self.ecg_spark_config = helpers.parse_config(ecg_spark_config_infile)
     self.postgres_config = helpers.parse_config(postgres_config_infile)
     self.s3bucket_config = helpers.parse_config(s3bucket_config_infile)
     self.kafka_config = helpers.parse_config(kafka_config_infile)
     self.sc = SparkContext(appName='ECGDashboardApp')
     self.sc.setLogLevel("FATAL")
     self.ssc = StreamingContext(self.sc, batch_interval)
     self.logger.warn('Opened spark Context')
     self.kafkastream = self.connectToKafkaBrokers()
     self.logger.warn('Opened connection to Kafka brokers')
     self.a = self.sc.accumulator(0)
コード例 #7
0
 def __init__(self, postgres_config_infile):
     if not os.path.exists('./tmp'):
         os.makedirs('./tmp')
     logging.basicConfig(level=logging.DEBUG,
                         format='%(asctime)s %(levelname)s %(message)s',
                         filename='./tmp/website.log',
                         filemode='w')
     self.logger = logging.getLogger('py4j')
     self.postgres_config = helpers.parse_config(postgres_config_infile)
     self.cur = self.connectToDB()
     self.signal_schema = [
         'batchnum', 'signame', 'time', 'ecg1', 'ecg2', 'ecg3'
     ]
     self.hr_schema = ['batchnum', 'signame', 'time', 'hr1', 'hr2', 'hr3']
コード例 #8
0
ファイル: reddit.py プロジェクト: perkelex/GDScrapper
 def __init__(self, subreddit, output_file_name=paths.OUTPUT_FILE):
     super().__init__()
     self.filter = helpers.Filter(watchlist=True)
     self.session = Session()
     self.reddit = self.init_connection()
     self.subreddit = subreddit
     self.output_file_name = output_file_name
     self.watchlist = helpers.parse_config("watchlist")
     self.watchlist_hits = []
     self.submissions = []
     self.subs_100_off = []
     self.subs_90_off = []
     self.subs_80_off = []
     self.subs_70_off = []
     self.subs_60_off = []
     self.subs_50_off = []
     self.subs_40_off = []
     self.subs_30_off = []
     self.subs_20_off = []
     self.subs_10_off = []
コード例 #9
0
ファイル: streamers.py プロジェクト: wgong/TaxiOptimizer
 def __init__(self,
              kafka_configfile,
              schema_configfile,
              stream_configfile,
              psql_configfile,
              start_offset=0):
     """
     class constructor that initializes the instance according to the configurations
     of Kafka (brokers, topic, offsets), PostgreSQL database, data schema and batch interval for streaming
     :type kafka_configfile:  str        path to s3 config file
     :type schema_configfile: str        path to schema config file
     :type stream_configfile: str        path to stream config file
     :type psql_configfile:   str        path to psql config file
     :type start_offset:      int        offset from which to read from partitions of Kafka topic
     """
     SparkStreamerFromKafka.__init__(self, kafka_configfile,
                                     schema_configfile, stream_configfile,
                                     start_offset)
     self.psql_config = helpers.parse_config(psql_configfile)
     self.sqlContext = pyspark.sql.SQLContext(self.sc)
     self.load_batch_data()
     self.psql_n = 0
コード例 #10
0
from flask import Flask, jsonify, request, send_file
import ast
import os
import sys
from pymongo import DESCENDING
if os.getcwd().split('/')[-1] != 'fl_sport_betting':
    os.chdir("..")
    sys.path.append(os.path.abspath(os.curdir))
from storage.mongodb_storage import MongoDBStorage as mdb
import helpers
config = helpers.parse_config('server')

client = mdb().client
app = Flask(__name__)


def group_forecasts(forecasts):
    grouped = {}
    for forecast in forecasts:
        grouped.setdefault(forecast['forecast_type'], [])
        grouped[forecast['forecast_type']].append(forecast)
    return grouped


def get_file_link(resource, file_name):
    pattern = '{server_domain}/api/files/{resource}/{file_name}'
    file_link = pattern.format(server_domain=config['domain'],
                               resource=resource,
                               file_name=file_name)
    return file_link
コード例 #11
0
                    topic=cfg['kafka_broker']['producer_topic_deals'],
                    message=json):
                influxdb_client.write_deal()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    env_list = ['dev', 'sit', 'prd']

    parser.add_argument('-env',
                        choices=env_list,
                        metavar=str(env_list),
                        required=True,
                        help='parser environment type')

    # Парсинг аргументов
    args = vars(parser.parse_args())
    env_type = args['env']

    root_path = os.path.dirname(os.path.realpath(__file__))
    cfg = helpers.parse_config(env_type=env_type, root_path=root_path)
    logger = helpers.create_timed_rotating_log(cfg=cfg, root_path=root_path)

    start_time = datetime.combine(datetime.now().date(), time(4, 0, 0))
    if datetime.now() < start_time:
        logger.info('service will be starting at 04:00:00')
        t.sleep((start_time - datetime.now()).seconds)

    read_from_efx()
コード例 #12
0
import os
import sys
sys.path.append('../python')
import boto3
import helpers

# TODO: Fix brokers unavailable issue now switched over the confluent-kafka.

session = 'k1'
kafka_config = helpers.parse_config('../../.config/kafka.config')
s3bucket_config = helpers.parse_config('../../.config/s3bucket.config')
ipaddr = kafka_config['ip-addr'].split(',')
s3 = boto3.client('s3')
obj = s3.get_object(Bucket="testsmalldata", Key="RECORDS_abridged.txt")
records = obj['Body'].read().decode('utf-8').split('\n')

records_per_node = int(round(len(records) / len(ipaddr)))
print(records_per_node)

#Open x number of file threads on y nodes. Visualized using tmux.
os.system('tmux kill-session -t %s' % session)
os.system('tmux new-session -s %s -n bash -d' % session)
for i in range(len(ipaddr)):
    start = i * records_per_node
    if i == len(ipaddr):
        stop = len(records) - 1
    else:
        stop = (i + 1) * records_per_node
    ip = ipaddr[i]
    records_interval = records[start:stop]
    os.system('echo %s' % ip)
コード例 #13
0
from datetime import datetime, timedelta
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.bash_operator import BashOperator
import psycopg2
import pandas as pd
import boto3
from StringIO import StringIO

sys.path.append('../../python/')
import helpers

s3bucket_config_infile = '../../../.config/s3bucket.config'
postgres_config_infile = '../../../.config/postgres.config'

s3bucket_config = helpers.parse_config(s3bucket_config_infile)
postgres_config = helpers.parse_config(postgres_config_infile)

schema = ['id', 'batchnum', 'signame', 'time', 'ecg1', 'ecg2', 'ecg3']


def connectToDB(postgres_config):
    """
    :return: database cursor
    """
    try:
        conn = psycopg2.connect(host=postgres_config['host'],
                                database=postgres_config['database'],
                                port=postgres_config['port'],
                                user=postgres_config['user'],
                                password=postgres_config['password'])
コード例 #14
0
sys.path.append("/home/ubuntu/TaxiOptimizer/helpers/")

import os
import time, json
from app import app
from datetime import datetime
from flask import jsonify, render_template, request
import helpers
from math import floor
from more_itertools import peekable
import psycopg2
import random


# configure connection string for PostgreSQL database
app.dbconfig = helpers.parse_config('/home/ubuntu/TaxiOptimizer/config/postgresql.config')
app.conn_str = "host='%s' dbname='%s' user='******' password='******'" % (app.dbconfig["host"],
                                                                  app.dbconfig["dbname"],
                                                                  app.dbconfig["user"],
                                                                  app.dbconfig["password"])

# set default vehicle_id and the list of coordinates to display
app.vid = []
app.res = []
app.coords = []

# time will be in the range from 10am to 10pm
app.curtime = 600


def print_time(t):