def __init__(self, **kwargs): kwreader = common.KeywordArgReader('host', 'port') kwreader.read(**kwargs) self.hostname = kwreader.get_value('host') self.port = int(kwreader.get_value('port')) self.poll_job = APIEndpoint(host=self.hostname, port=self.port, path='job', method='GET') self.update_job_status = APIEndpoint(host=self.hostname, port=self.port, path='jobstatus', method='POST') self.update_job_log = APIEndpoint(host=self.hostname, port=self.port, path='joblog', method='POST') self.poll_job_bids = APIEndpoint(host=self.hostname, port=self.port, path='bids', method='GET') self.couriers = APIEndpoint(host=self.hostname, port=self.port, path='couriers', method='GET') self.bidstat = APIEndpoint(host=self.hostname, port=self.port, path='bidstat', method='GET') self.award = APIEndpoint(host=self.hostname, port=self.port, path='award', method='POST')
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('kafka_nodes') kwreader.read(**kwargs) node_strings = kwreader.get_value('kafka_nodes') self.nodes = [tkcore.KafkaNode(ns) for ns in node_strings] self.kafka_client = KafkaClient(hosts=self.connect_string)
def __init__(self, **kwargs): self.required_settings = [ 'autocreate_pk_if_missing', 'pk_name', 'pk_type', 'varchar_length', 'column_type_map' ] self.optional_settings = ['table_suffix', 'column_suffix'] kwreader = common.KeywordArgReader(*self.required_settings) kwreader.read(**kwargs) self.settings = OrderedDict() self.column_type_map = {} for name in self.required_settings: if name == 'column_type_map': for k, v in kwargs['column_type_map'].items(): self.column_type_map[k] = v else: self.settings[name] = Parameter(name=name, value=kwreader.get_value(name)) kwargs.pop(name) for key, value in kwargs.items(): param = Parameter(name=key, value=value) self.settings[key] = param self.settings['table_suffix'] = Parameter(name='table_suffix', value=kwargs.get( 'table_suffix', '')) self.settings['column_suffix'] = Parameter(name='column_suffix', value=kwargs.get( 'column_suffix', ''))
def __init__(self, yaml_config_filename, **kwargs): kwreader = common.KeywordArgReader('context_name') kwreader.read(**kwargs) self._context_name = kwreader.get_value('context_name') self._yaml_config = None with open(yaml_config_filename, 'r') as f: self._yaml_config = yaml.load(f)
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('user_pool_id', 'client_id', 'aws_region') kwreader.read(**kwargs) self.user_pool_id = kwreader.get_value('user_pool_id') self.client_id = kwreader.get_value('client_id') self.aws_region = kwreader.get_value('aws_region') self.client_secret = kwargs.get('client_secret') should_authenticate_via_iam = kwargs.get('auth_via_iam', False) if not should_authenticate_via_iam: key_id = kwargs.get('aws_key_id') secret_key = kwargs.get('aws_secret_key') if not key_id or not secret_key: raise Exception(COGNITO_AUTH_ERROR_MESSAGE) self.cognito_client = boto3.client( 'cognito-idp', aws_access_key_id=key_id, aws_secret_access_key=secret_key, region_name=self.aws_region) else: self.cognito_client = boto3.client('cognito-idp', region_name=self.aws_region)
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('local_temp_path', 'region') kwreader.read(**kwargs) self.local_tmp_path = kwreader.get_value('local_temp_path') self.region = kwreader.get_value('region') self.s3session = None self.aws_access_key_id = None self.aws_secret_access_key = None # we set this to True if we are initializing this object from inside an AWS Lambda, # because in that case we do not require the aws credential parameters to be set. # The default is False, which is what we want when we are creating this object # in a normal (non-AWS-Lambda) execution context: clients must pass in credentials. should_authenticate_via_iam = kwargs.get('auth_via_iam', False) if not should_authenticate_via_iam: print("NOT authenticating via IAM. Setting credentials now.", file=sys.stderr) self.aws_access_key_id = kwargs.get('aws_key_id') self.aws_secret_access_key = kwargs.get('aws_secret_key') if not self.aws_secret_access_key or not self.aws_access_key_id: raise Exception(s3_auth_error_mesage) self.s3client = boto3.client( 's3', aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key) else: self.s3client = boto3.client('s3', region_name=self.region)
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('target_topic', 'qualifier', 'services') kwreader.read(**kwargs) self.target_topic = kwargs['target_topic'] self.qualify = kwargs['qualifier'] self.services = kwargs['services']
def __init__(self, **kwargs): DataRelay.__init__(self, **kwargs) kwreader = common.KeywordArgReader('db', 'tablespec') kwreader.read(**kwargs) self.database = kwreader.get_value('db') self.tablespec = kwreader.get_value('tablespec') self._insert_sql = text(self.tablespec.insert_statement_template)
def process(self, record_generator, **kwargs): kwreader = common.KeywordArgReader('kafka_writer') kwreader.read(**kwargs) kafka_writer = kwargs['kafka_writer'] kwargs.update({'services': self.services}) for record in record_generator: if self.qualify(record, **kwargs): kafka_writer.write(self.target_topic, record)
def __init__(self, **kwargs): kwreader = common.KeywordArgReader(*REQUIRED_JOB_FIELDS) kwreader.read(**kwargs) self._name = kwreader.get_value('name') self._description = kwreader.get_value('description') self._owner_id = kwreader.get_value('owner_id') self._id = kwargs.get('id') # this is an optional kwarg self._status = kwargs.get('status') # same here
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('accept_topic', 'reject_topic', 'qualifier', 'service_objects') kwreader.read(**kwargs) self.accept_topic = kwargs['accept_topic'] self.reject_topic = kwargs['reject_topic'] self.qualify = kwargs['qualifier'] self.services = kwargs['services']
def __init__(self, logging_level=logging.DEBUG, **kwargs): kwreader = common.KeywordArgReader('sentry_dsn') kwreader.read(**kwargs) sentry_dsn = kwargs.get_value('sentry_dsn') self._client = Client(sentry_dsn) sentry_handler = SentryHandler() sentry_handler.setLevel(logging_level) log.addHandler(sentry_handler)
def __init__(self, **kwargs): kwreader = common.KeywordArgReader(*POSTGRESQL_SVC_PARAM_NAMES) kwreader.read(**kwargs) self.db_name = kwargs['database'] self.host = kwargs['host'] self.port = int(kwargs.get('port', 5432)) self.username = kwargs['username'] self.password = kwargs['password'] self.schema = kwargs['schema'] self.max_connect_retries = int(kwargs.get('max_connect_retries') or 3) self.metadata = None self.engine = None self.session_factory = None self.Base = None self.url = None url_template = '{db_type}://{user}:{passwd}@{host}/{database}' db_url = url_template.format(db_type='postgresql+psycopg2', user=self.username, passwd=self.password, host=self.host, port=self.port, database=self.db_name) retries = 0 connected = False while not connected and retries < self.max_connect_retries: try: self.engine = sqla.create_engine(db_url, echo=False) self.metadata = MetaData(schema=self.schema) self.Base = automap_base(bind=self.engine, metadata=self.metadata) self.Base.prepare(self.engine, reflect=True) self.metadata.reflect(bind=self.engine) self.session_factory = sessionmaker(bind=self.engine, autoflush=False, autocommit=False) # this is required. See comment in SimpleRedshiftService connection = self.engine.connect() connection.close() connected = True print('### Connected to PostgreSQL DB.', file=sys.stderr) self.url = db_url except Exception as err: print(err, file=sys.stderr) print(err.__class__.__name__, file=sys.stderr) print(err.__dict__, file=sys.stderr) time.sleep(1) retries += 1 if not connected: raise Exception( '!!! Unable to connect to PostgreSQL db on host %s at port %s.' % (self.host, self.port))
def add_job(self, **kwargs): kwarg_reader = common.KeywordArgReader(*REQUIRED_JOB_FIELDS).read( **kwargs) job_id = self._generate_job_id(**kwargs) job_params = kwargs job_params.update({'id': job_id}) self._jobs[job_id] = Job(**job_params) return job_id
def __init__(self, checkpoint_function, **kwargs): threading.Thread.__init__(self) kwreader = common.KeywordArgReader('checkpoint_interval').read( **kwargs) self._seconds = 0 self._stopped = True self._checkpoint_function = checkpoint_function self._interval = kwreader.get_value('checkpoint_interval') self._checkpoint_function_args = kwargs
def __init__(self, app_name='m2', **kwargs): kwreader = common.KeywordArgReader(*[]) kwreader.read(**kwargs) self.name = app_name Cmd.__init__(self) self.prompt = '%s> ' % self.name self.datasource_specs = kwreader.get_value('datasource_specs') or [] self.map_specs = kwreader.get_value('maps') or [] #self.service_objects = kwreader.get_value('service_objects') or [] globals = kwreader.get_value('global_params') or []
def split(self, record_generator, **kwargs): kwreader = common.KeywordArgReader('kafka_writer') kwreader.read(**kwargs) kafka_writer = kwargs['kafka_writer'] kwargs.update({'services': self.services}) for record in record_generator: if self.qualify(record, **kwargs): kafka_writer.write(self.accept_topic, record) else: kafka_writer.write(self.reject_topic, record)
def set_core_transformer(self, map_name, yaml_config_file): kwreader = common.KeywordArgReader('map_name', 'config_file') kwreader.read(**kwargs) yaml_config_file = kwargs['config_file'] map_anme = kwargs['map_name'] builder = datamap.RecordTransformerBuilder(yaml_config_file, map_name=map_name) self.core_transformer = builder.build() return self
def __init__(self, app_name='mkstream', **kwargs): kwreader = common.KeywordArgReader('project_name', 'output_file') kwreader.read(**kwargs) self.output_file = kwreader.get_value('output_file') self.project_name = kwreader.get_value('project_name') self.name = app_name Cmd.__init__(self) self.prompt = '%s [%s] > ' % (self.name, self.project_name) self.stream_specs = [] _ = os.system('clear')
def textfile_line_generator(**kwargs): kwreader = common.KeywordArgReader('filename') kwreader.read(**kwargs) filename = kwreader.get_value('filename') with open(filename, 'rt') as f: for raw_line in f: line = raw_line.rstrip().lstrip() if len(line): yield line else: continue
def named_tuple_array_to_dict(tuple_array, **kwargs): kwreader = common.KeywordArgReader('key_name', 'value_name') kwreader.read(**kwargs) data = {} key_name = kwreader.get_value('key_name') value_name = kwreader.get_value('value_name') for named_tuple in tuple_array: key = getattr(named_tuple, key_name) value = getattr(named_tuple, value_name) data[key] = value return data
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('local_temp_dir', 'src_filename', 'src_file_header', 'src_file_delimiter') kwreader.read(kwargs) self._local_temp_directory = kwreader.get_value('local_temp_dir') self._source_filename = kwreader.get_value('src_filename') self._source_file_header = kwreader.get_value('src_file_header') self._source_file_delimiter = kwreader.get_value('src_file_delimiter') self._svc_registry = None self._transfer_functions = {}
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('host', 'username', 'database', 'password') kwreader.read(**kwargs) self.host = kwreader.get_value('host') self.port = int(kwreader.get_value('port') or 1433) self.username = kwreader.get_value('username') self.db_name = kwreader.get_value('database') self.password = kwreader.get_value('password') self.db = sqldbx.SQLServerDatabase(self.host, self.db_name, self.port) self.db.login(self.username, self.password) self._data_manager = sqldbx.PersistenceManager(self.db)
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('record_type', 'pipeline_id') kwreader.read(**kwargs) self._version = 1 self._record_type = kwreader.get_value('record_type') self._pipeline_id = kwreader.get_value('pipeline_id') self._timestamp = datetime.datetime.now().isoformat() self._extra_headers = [] for key, value in kwargs.items(): if key not in ['record_type', 'pipeline_id']: self._extra_headers.append({'name': key, 'value': value})
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('staging_topic', 'core_topic', 'kafka_cluster', 'staging_record_transformer', 'core_record_transformer') kwreader.read(**kwargs) self._staging_topic = kwargs['staging_topic'] self._core_topic = kwargs['core_topic'] self._cluster = kwargs['kafka_cluster'] self._staging_transformer = kwargs['staging_record_transformer'] self._core_transformer = kwargs['core_record_transformer']
def __init__(self, name, **kwargs): kwreader = common.KeywordArgReader(*REQUIRED_ED_CHANNEL_FIELDS) kwreader.read(**kwargs) self.name = name self._data = {} for key, value in kwargs.items(): if key == 'payload_fields': self._data[key] = set() # here we know that value is actually a collection for field in value: self._data[key].add(field) else: self._data[key] = value
def __init__(self, **kwargs): kwreader = common.KeywordArgReader(*PIPELINE_SVC_PARAM_NAMES) kwreader.read(**kwargs) self.job_bucket_name = kwargs['job_bucket_name'] self.posted_jobs_folder = kwargs['posted_jobs_folder'] self.accepted_jobs_folder = kwargs['accepted_jobs_folder'] self.bid_window_limit_type = kwargs['bid_window_limit_type'] if self.bid_window_limit_type not in ALLOWED_BIDDING_LIMIT_TYPES: raise Exception( 'Invalid bidding limit type %s. Allowed types are %s.' % (self.bid_window_limit_type, ALLOWED_BIDDING_LIMIT_TYPES)) self.bid_window_limit = int(kwargs['bid_window_limit'])
def __init__(self, **kwargs): kwreader = common.KeywordArgReader('fact_table_field_name', 'dim_table_name', 'key_field_name', 'value_field_name', 'primary_key_type', 'id_lookup_function') kwreader.read(**kwargs) self._fact_table_field_name = kwreader.get_value( 'fact_table_field_name') self._dim_table_name = kwreader.get_value('dim_table_name') self._key_field_name = kwreader.get_value('key_field_name') self._value_field_name = kwreader.get_value('value_field_name') self._pk_type = kwreader.get_value('primary_key_type') self._lookup_func = kwreader.get_value('id_lookup_function')
def __init__(self, app_name='mkmap', **kwargs): kwreader = common.KeywordArgReader(*[]) kwreader.read(**kwargs) self.name = app_name Cmd.__init__(self) self.prompt = '%s> ' % self.name self.datasource_specs = kwreader.get_value('datasource_specs') or [] self.map_specs = kwreader.get_value('maps') or [] self.service_objects = kwreader.get_value('service_objects') or [] self.globals = [] self.globals.append(ParamSpec(name='project_home', value='')) self.globals.append(ParamSpec(name='datasource_module', value='')) self.globals.append(ParamSpec(name='service_module', value='')) self.initial_datafile = kwargs.get('initial_sourcefile') _ = os.system('clear')
def __init__(self, cli_prompt, **kwargs): kwreader = common.KeywordArgReader('warning_message', 'failure_message') kwreader.read(**kwargs) warning_message = kwreader.get_value('warning_message') failure_message = kwreader.get_value('failure_message') max_retries = 1 num_retries = 0 self.data = cli_prompt.show() while num_retries < max_retries and not self.data: print('\n%s\n' % warning_message) self.data = cli_prompt.show() num_retries += 1 if not self.data: raise Exception(failure_message)