Beispiel #1
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader('host', 'port')
        kwreader.read(**kwargs)
        self.hostname = kwreader.get_value('host')
        self.port = int(kwreader.get_value('port'))

        self.poll_job = APIEndpoint(host=self.hostname,
                                    port=self.port,
                                    path='job',
                                    method='GET')
        self.update_job_status = APIEndpoint(host=self.hostname,
                                             port=self.port,
                                             path='jobstatus',
                                             method='POST')
        self.update_job_log = APIEndpoint(host=self.hostname,
                                          port=self.port,
                                          path='joblog',
                                          method='POST')
        self.poll_job_bids = APIEndpoint(host=self.hostname,
                                         port=self.port,
                                         path='bids',
                                         method='GET')
        self.couriers = APIEndpoint(host=self.hostname,
                                    port=self.port,
                                    path='couriers',
                                    method='GET')
        self.bidstat = APIEndpoint(host=self.hostname,
                                   port=self.port,
                                   path='bidstat',
                                   method='GET')
        self.award = APIEndpoint(host=self.hostname,
                                 port=self.port,
                                 path='award',
                                 method='POST')
Beispiel #2
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader('kafka_nodes')
        kwreader.read(**kwargs)
        node_strings = kwreader.get_value('kafka_nodes')
        self.nodes = [tkcore.KafkaNode(ns) for ns in node_strings]

        self.kafka_client = KafkaClient(hosts=self.connect_string)
Beispiel #3
0
    def __init__(self, **kwargs):
        self.required_settings = [
            'autocreate_pk_if_missing', 'pk_name', 'pk_type', 'varchar_length',
            'column_type_map'
        ]
        self.optional_settings = ['table_suffix', 'column_suffix']

        kwreader = common.KeywordArgReader(*self.required_settings)
        kwreader.read(**kwargs)
        self.settings = OrderedDict()
        self.column_type_map = {}
        for name in self.required_settings:
            if name == 'column_type_map':
                for k, v in kwargs['column_type_map'].items():
                    self.column_type_map[k] = v
            else:
                self.settings[name] = Parameter(name=name,
                                                value=kwreader.get_value(name))
            kwargs.pop(name)

        for key, value in kwargs.items():
            param = Parameter(name=key, value=value)
            self.settings[key] = param

        self.settings['table_suffix'] = Parameter(name='table_suffix',
                                                  value=kwargs.get(
                                                      'table_suffix', ''))
        self.settings['column_suffix'] = Parameter(name='column_suffix',
                                                   value=kwargs.get(
                                                       'column_suffix', ''))
Beispiel #4
0
 def __init__(self, yaml_config_filename, **kwargs):
     kwreader = common.KeywordArgReader('context_name')
     kwreader.read(**kwargs)
     self._context_name = kwreader.get_value('context_name')
     self._yaml_config = None
     with open(yaml_config_filename, 'r') as f:
         self._yaml_config = yaml.load(f)
Beispiel #5
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader('user_pool_id', 'client_id',
                                           'aws_region')
        kwreader.read(**kwargs)
        self.user_pool_id = kwreader.get_value('user_pool_id')
        self.client_id = kwreader.get_value('client_id')
        self.aws_region = kwreader.get_value('aws_region')
        self.client_secret = kwargs.get('client_secret')

        should_authenticate_via_iam = kwargs.get('auth_via_iam', False)

        if not should_authenticate_via_iam:
            key_id = kwargs.get('aws_key_id')
            secret_key = kwargs.get('aws_secret_key')
            if not key_id or not secret_key:
                raise Exception(COGNITO_AUTH_ERROR_MESSAGE)

            self.cognito_client = boto3.client(
                'cognito-idp',
                aws_access_key_id=key_id,
                aws_secret_access_key=secret_key,
                region_name=self.aws_region)
        else:
            self.cognito_client = boto3.client('cognito-idp',
                                               region_name=self.aws_region)
Beispiel #6
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader('local_temp_path', 'region')
        kwreader.read(**kwargs)

        self.local_tmp_path = kwreader.get_value('local_temp_path')
        self.region = kwreader.get_value('region')
        self.s3session = None
        self.aws_access_key_id = None
        self.aws_secret_access_key = None

        # we set this to True if we are initializing this object from inside an AWS Lambda,
        # because in that case we do not require the aws credential parameters to be set.
        # The default is False, which is what we want when we are creating this object
        # in a normal (non-AWS-Lambda) execution context: clients must pass in credentials.
        should_authenticate_via_iam = kwargs.get('auth_via_iam', False)

        if not should_authenticate_via_iam:
            print("NOT authenticating via IAM. Setting credentials now.",
                  file=sys.stderr)
            self.aws_access_key_id = kwargs.get('aws_key_id')
            self.aws_secret_access_key = kwargs.get('aws_secret_key')
            if not self.aws_secret_access_key or not self.aws_access_key_id:
                raise Exception(s3_auth_error_mesage)
            self.s3client = boto3.client(
                's3',
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key)
        else:
            self.s3client = boto3.client('s3', region_name=self.region)
Beispiel #7
0
 def __init__(self, **kwargs):
     kwreader = common.KeywordArgReader('target_topic', 'qualifier',
                                        'services')
     kwreader.read(**kwargs)
     self.target_topic = kwargs['target_topic']
     self.qualify = kwargs['qualifier']
     self.services = kwargs['services']
Beispiel #8
0
 def __init__(self, **kwargs):
     DataRelay.__init__(self, **kwargs)
     kwreader = common.KeywordArgReader('db', 'tablespec')
     kwreader.read(**kwargs)
     self.database = kwreader.get_value('db')
     self.tablespec = kwreader.get_value('tablespec')
     self._insert_sql = text(self.tablespec.insert_statement_template)
Beispiel #9
0
 def process(self, record_generator, **kwargs):
     kwreader = common.KeywordArgReader('kafka_writer')
     kwreader.read(**kwargs)
     kafka_writer = kwargs['kafka_writer']
     kwargs.update({'services': self.services})
     for record in record_generator:
         if self.qualify(record, **kwargs):
             kafka_writer.write(self.target_topic, record)
Beispiel #10
0
 def __init__(self, **kwargs):
     kwreader = common.KeywordArgReader(*REQUIRED_JOB_FIELDS)
     kwreader.read(**kwargs)
     self._name = kwreader.get_value('name')
     self._description = kwreader.get_value('description')
     self._owner_id = kwreader.get_value('owner_id')
     self._id = kwargs.get('id')  # this is an optional kwarg
     self._status = kwargs.get('status')  # same here
Beispiel #11
0
 def __init__(self, **kwargs):
     kwreader = common.KeywordArgReader('accept_topic', 'reject_topic',
                                        'qualifier', 'service_objects')
     kwreader.read(**kwargs)
     self.accept_topic = kwargs['accept_topic']
     self.reject_topic = kwargs['reject_topic']
     self.qualify = kwargs['qualifier']
     self.services = kwargs['services']
Beispiel #12
0
 def __init__(self, logging_level=logging.DEBUG, **kwargs):
     kwreader = common.KeywordArgReader('sentry_dsn')
     kwreader.read(**kwargs)
     
     sentry_dsn = kwargs.get_value('sentry_dsn')
     self._client = Client(sentry_dsn)
     sentry_handler = SentryHandler()
     sentry_handler.setLevel(logging_level)
     log.addHandler(sentry_handler)
Beispiel #13
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader(*POSTGRESQL_SVC_PARAM_NAMES)
        kwreader.read(**kwargs)

        self.db_name = kwargs['database']
        self.host = kwargs['host']
        self.port = int(kwargs.get('port', 5432))
        self.username = kwargs['username']
        self.password = kwargs['password']
        self.schema = kwargs['schema']
        self.max_connect_retries = int(kwargs.get('max_connect_retries') or 3)
        self.metadata = None
        self.engine = None
        self.session_factory = None
        self.Base = None
        self.url = None

        url_template = '{db_type}://{user}:{passwd}@{host}/{database}'
        db_url = url_template.format(db_type='postgresql+psycopg2',
                                     user=self.username,
                                     passwd=self.password,
                                     host=self.host,
                                     port=self.port,
                                     database=self.db_name)

        retries = 0
        connected = False
        while not connected and retries < self.max_connect_retries:
            try:
                self.engine = sqla.create_engine(db_url, echo=False)
                self.metadata = MetaData(schema=self.schema)
                self.Base = automap_base(bind=self.engine,
                                         metadata=self.metadata)
                self.Base.prepare(self.engine, reflect=True)
                self.metadata.reflect(bind=self.engine)
                self.session_factory = sessionmaker(bind=self.engine,
                                                    autoflush=False,
                                                    autocommit=False)

                # this is required. See comment in SimpleRedshiftService
                connection = self.engine.connect()
                connection.close()
                connected = True
                print('### Connected to PostgreSQL DB.', file=sys.stderr)
                self.url = db_url

            except Exception as err:
                print(err, file=sys.stderr)
                print(err.__class__.__name__, file=sys.stderr)
                print(err.__dict__, file=sys.stderr)
                time.sleep(1)
                retries += 1

        if not connected:
            raise Exception(
                '!!! Unable to connect to PostgreSQL db on host %s at port %s.'
                % (self.host, self.port))
Beispiel #14
0
    def add_job(self, **kwargs):
        kwarg_reader = common.KeywordArgReader(*REQUIRED_JOB_FIELDS).read(
            **kwargs)
        job_id = self._generate_job_id(**kwargs)

        job_params = kwargs
        job_params.update({'id': job_id})
        self._jobs[job_id] = Job(**job_params)
        return job_id
Beispiel #15
0
 def __init__(self, checkpoint_function, **kwargs):
     threading.Thread.__init__(self)
     kwreader = common.KeywordArgReader('checkpoint_interval').read(
         **kwargs)
     self._seconds = 0
     self._stopped = True
     self._checkpoint_function = checkpoint_function
     self._interval = kwreader.get_value('checkpoint_interval')
     self._checkpoint_function_args = kwargs
Beispiel #16
0
 def __init__(self, app_name='m2', **kwargs):
     kwreader = common.KeywordArgReader(*[])
     kwreader.read(**kwargs)
     self.name = app_name
     Cmd.__init__(self)
     self.prompt = '%s> ' % self.name
     self.datasource_specs = kwreader.get_value('datasource_specs') or []
     self.map_specs = kwreader.get_value('maps') or []
     #self.service_objects = kwreader.get_value('service_objects') or []
     globals = kwreader.get_value('global_params') or []
Beispiel #17
0
 def split(self, record_generator, **kwargs):
     kwreader = common.KeywordArgReader('kafka_writer')
     kwreader.read(**kwargs)
     kafka_writer = kwargs['kafka_writer']
     kwargs.update({'services': self.services})
     for record in record_generator:
         if self.qualify(record, **kwargs):
             kafka_writer.write(self.accept_topic, record)
         else:
             kafka_writer.write(self.reject_topic, record)
Beispiel #18
0
    def set_core_transformer(self, map_name, yaml_config_file):
        kwreader = common.KeywordArgReader('map_name', 
                                           'config_file')
        kwreader.read(**kwargs)
        yaml_config_file = kwargs['config_file']
        map_anme = kwargs['map_name']

        builder = datamap.RecordTransformerBuilder(yaml_config_file, map_name=map_name)
        self.core_transformer = builder.build()
        return self
Beispiel #19
0
 def __init__(self, app_name='mkstream', **kwargs):
     kwreader = common.KeywordArgReader('project_name', 'output_file')
     kwreader.read(**kwargs)
     self.output_file = kwreader.get_value('output_file')
     self.project_name = kwreader.get_value('project_name')
     self.name = app_name
     Cmd.__init__(self)
     self.prompt = '%s [%s] > ' % (self.name, self.project_name)
     self.stream_specs = []
     _ = os.system('clear')
Beispiel #20
0
def textfile_line_generator(**kwargs):
    kwreader = common.KeywordArgReader('filename')
    kwreader.read(**kwargs)
    filename = kwreader.get_value('filename')
    with open(filename, 'rt') as f:
        for raw_line in f:
            line = raw_line.rstrip().lstrip()
            if len(line):
                yield line
            else:
                continue
Beispiel #21
0
def named_tuple_array_to_dict(tuple_array, **kwargs):
    kwreader = common.KeywordArgReader('key_name', 'value_name')
    kwreader.read(**kwargs)
    data = {}
    key_name = kwreader.get_value('key_name')
    value_name = kwreader.get_value('value_name')

    for named_tuple in tuple_array:
        key = getattr(named_tuple, key_name)
        value = getattr(named_tuple, value_name)
        data[key] = value
    return data
Beispiel #22
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader('local_temp_dir', 'src_filename',
                                           'src_file_header',
                                           'src_file_delimiter')

        kwreader.read(kwargs)
        self._local_temp_directory = kwreader.get_value('local_temp_dir')
        self._source_filename = kwreader.get_value('src_filename')
        self._source_file_header = kwreader.get_value('src_file_header')
        self._source_file_delimiter = kwreader.get_value('src_file_delimiter')
        self._svc_registry = None
        self._transfer_functions = {}
Beispiel #23
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader('host', 'username', 'database', 'password')
        kwreader.read(**kwargs)

        self.host = kwreader.get_value('host')
        self.port = int(kwreader.get_value('port') or 1433)
        self.username = kwreader.get_value('username')
        self.db_name = kwreader.get_value('database')
        self.password = kwreader.get_value('password')
        self.db = sqldbx.SQLServerDatabase(self.host, self.db_name, self.port)
        self.db.login(self.username, self.password)
        self._data_manager = sqldbx.PersistenceManager(self.db)
Beispiel #24
0
    def __init__(self, **kwargs):

        kwreader = common.KeywordArgReader('record_type', 'pipeline_id')
        kwreader.read(**kwargs)

        self._version = 1
        self._record_type = kwreader.get_value('record_type')
        self._pipeline_id = kwreader.get_value('pipeline_id')
        self._timestamp = datetime.datetime.now().isoformat()
        self._extra_headers = []
        for key, value in kwargs.items():
            if key not in ['record_type', 'pipeline_id']:
                self._extra_headers.append({'name': key, 'value': value})
Beispiel #25
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader('staging_topic',
                                           'core_topic',
                                           'kafka_cluster',
                                           'staging_record_transformer', 
                                           'core_record_transformer')

        kwreader.read(**kwargs)
        self._staging_topic = kwargs['staging_topic']
        self._core_topic = kwargs['core_topic']
        self._cluster = kwargs['kafka_cluster']
        self._staging_transformer = kwargs['staging_record_transformer']
        self._core_transformer = kwargs['core_record_transformer']
Beispiel #26
0
 def __init__(self, name, **kwargs):
     kwreader = common.KeywordArgReader(*REQUIRED_ED_CHANNEL_FIELDS)
     kwreader.read(**kwargs)
     self.name = name
     self._data = {}
     for key, value in kwargs.items():
         if key == 'payload_fields':
             self._data[key] = set()
             # here we know that value is actually a collection
             for field in value:
                 self._data[key].add(field)
         else:
             self._data[key] = value
Beispiel #27
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader(*PIPELINE_SVC_PARAM_NAMES)
        kwreader.read(**kwargs)
        self.job_bucket_name = kwargs['job_bucket_name']
        self.posted_jobs_folder = kwargs['posted_jobs_folder']
        self.accepted_jobs_folder = kwargs['accepted_jobs_folder']
        self.bid_window_limit_type = kwargs['bid_window_limit_type']

        if self.bid_window_limit_type not in ALLOWED_BIDDING_LIMIT_TYPES:
            raise Exception(
                'Invalid bidding limit type %s. Allowed types are %s.' %
                (self.bid_window_limit_type, ALLOWED_BIDDING_LIMIT_TYPES))

        self.bid_window_limit = int(kwargs['bid_window_limit'])
Beispiel #28
0
    def __init__(self, **kwargs):
        kwreader = common.KeywordArgReader('fact_table_field_name',
                                           'dim_table_name', 'key_field_name',
                                           'value_field_name',
                                           'primary_key_type',
                                           'id_lookup_function')

        kwreader.read(**kwargs)

        self._fact_table_field_name = kwreader.get_value(
            'fact_table_field_name')
        self._dim_table_name = kwreader.get_value('dim_table_name')
        self._key_field_name = kwreader.get_value('key_field_name')
        self._value_field_name = kwreader.get_value('value_field_name')
        self._pk_type = kwreader.get_value('primary_key_type')
        self._lookup_func = kwreader.get_value('id_lookup_function')
Beispiel #29
0
    def __init__(self, app_name='mkmap', **kwargs):
        kwreader = common.KeywordArgReader(*[])
        kwreader.read(**kwargs)
        self.name = app_name
        Cmd.__init__(self)
        self.prompt = '%s> ' % self.name
        self.datasource_specs = kwreader.get_value('datasource_specs') or []
        self.map_specs = kwreader.get_value('maps') or []
        self.service_objects = kwreader.get_value('service_objects') or []
        self.globals = []
        self.globals.append(ParamSpec(name='project_home', value=''))
        self.globals.append(ParamSpec(name='datasource_module', value=''))
        self.globals.append(ParamSpec(name='service_module', value=''))

        self.initial_datafile = kwargs.get('initial_sourcefile')
        _ = os.system('clear')
Beispiel #30
0
    def __init__(self, cli_prompt, **kwargs):
        kwreader = common.KeywordArgReader('warning_message', 'failure_message')
        kwreader.read(**kwargs)

        warning_message = kwreader.get_value('warning_message')
        failure_message = kwreader.get_value('failure_message')
        
        max_retries = 1
        num_retries = 0
        self.data = cli_prompt.show()
        while num_retries < max_retries and not self.data:            
            print('\n%s\n' % warning_message)
            self.data = cli_prompt.show()
            num_retries += 1
        
        if not self.data:
            raise Exception(failure_message)