def __init__(self, generator_func, **kwargs): self._generator = generator_func if kwargs.get('service_registry'): self._services = kwargs['service_registry'] else: self._services = common.ServiceObjectRegistry({}) self._genargs = self.generator_args(**kwargs)
def main(args): configfile = args['<configfile>'] yaml_config = common.read_config_file(configfile) service_tbl = snap.initialize_services(yaml_config) registry = common.ServiceObjectRegistry(service_tbl) sms_svc = registry.lookup('sms') sid = sms_svc.send_sms('9174176968', 'hello NET') print(sid)
def main(args): configfile = args['<config_file>'] yaml_config = common.read_config_file(configfile) services = common.ServiceObjectRegistry(snap.initialize_services(yaml_config)) topic_name = args['<topic>'] tkservice = services.lookup('telekast') topic = tkservice.get_topic(topic_name) hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name', 'record_type') rfactory = tkcore.PipelineRecordFactory(payload_field_name='data') msg_count = 2000000 time_log = jrnl.TimeLog() prod_config = { "on_delivery": delivery_report, "bootstrap.servers": tkservice.connect_string, "group.id": "python_injector", "retry.backoff.ms": 3000, "retries": 5, "default.topic.config": {"request.required.acks": "1"}, "max.in.flight.requests.per.connection": 1, "queue.buffering.max.messages": 100000, "batch.num.messages": 50000, "message.max.bytes": 2000000 } producer = Producer(**prod_config) payload = uuid.uuid4() with jrnl.stopwatch('ingest_records', time_log): for i in range(msg_count): producer.poll(0) header = hfactory.create(pipeline_name='test', record_type='test_record') record = rfactory.create(header, **{'message': payload, 'tag': i}) producer.produce(topic_name, pickle.dumps(record), callback=delivery_report) if not i % 100000: print('%d messages sent.' % i) producer.flush() print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name)) print(time_log.readout) if len(errors): print('!!! Errors sending messages:') print('\n'.join(errors))
def main(args): registry = common.ServiceObjectRegistry({}) fs = FileStore(registry, filename='dexcrazy.txt') buffer = RecordBuffer(fs) with checkpoint(buffer, interval=4) as cpt: for i in range(9): buffer.write('hello world') print('checkpoint instance recorded %d calls to RecordStore.write()' % cpt.total_writes)
def main(args): init_filename = args['<init_file>'] yaml_config = common.read_config_file(init_filename) pipeline_id = yaml_config['globals']['pipeline_id'] logger = tdx.init_logging(LOG_TAG, 'reset.log', logging.DEBUG) service_objects = snap.initialize_services(yaml_config, logger) so_registry = common.ServiceObjectRegistry(service_objects) redis_svc = so_registry.lookup('redis') redis_svc.redis_server.instance().delete(redis_svc.get_transformed_record_queue(pipeline_id)) redis_svc.redis_server.instance().delete(redis_svc.get_raw_record_queue(pipeline_id)) redis_svc.redis_server.instance().delete(redis_svc.get_generator_to_user_map(pipeline_id))
def main(args): init_filename = args['<initfile>'] yaml_config = common.read_config_file(init_filename) log_directory = yaml_config['globals']['log_directory'] log_filename = 'forge.log' log = metl.init_logging('mx_forge', os.path.join(log_directory, log_filename), logging.DEBUG) services = snap.initialize_services(yaml_config, log) so_registry = common.ServiceObjectRegistry(services) forge_cli = ForgeCLI('forge', so_registry) forge_cli.cmdloop()
def setup(app): if app.config.get('initialized'): return app mode = app.config.get('startup_mode') yaml_config = load_snap_config(mode, app) app.debug = yaml_config['globals']['debug'] service_object_tbl = initialize_services(yaml_config) # # load the service objects into the app # app.config['services'] = common.ServiceObjectRegistry(service_object_tbl) app.config['initialized'] = True return app
def __init__(self, configfile_name, **kwargs): yaml_config = None with open(configfile_name) as f: yaml_config = yaml.load(f) self._cluster = tg.KafkaCluster() for entry in yaml_config['globals']['cluster_nodes']: tokens = entry.split(':') ip = tokens[0] port = tokens[1] self._cluster = self._cluster.add_node(tg.KafkaNode(ip, port)) self._source_topic = yaml_config['globals']['source_topic'] service_object_tbl = snap.initialize_services( yaml_config, logging.getLogger(__name__)) self._service_object_registry = common.ServiceObjectRegistry( service_object_tbl) object_db_config = yaml_config['object_db'] so_name = object_db_config['service_object'] db_service_object = self._service_object_registry.lookup(so_name) db_property = object_db_config['property'] self._db = getattr(db_service_object, db_property) tbl_name = yaml_config['tablespec']['table_name'] target_schema = yaml_config['tablespec']['schema'] obj_id_field = yaml_config['tablespec']['object_id_field'] pkfield_cfg = yaml_config['tablespec']['pk_field'] tspec_builder = TableSpecBuilder(tbl_name, schema=target_schema, pk_field=pkfield_cfg['name'], pk_type=pkfield_cfg['type'], object_id_field=obj_id_field, pk_default=pkfield_cfg['default']) for fieldname in yaml_config['tablespec']['data_fields']: fieldtype = yaml_config['tablespec']['data_fields'][fieldname] tspec_builder.add_data_field(fieldname, fieldtype) for fieldname in yaml_config['tablespec']['meta_fields']: fieldtype = yaml_config['tablespec']['meta_fields'][fieldname] tspec_builder.add_meta_field(fieldname, fieldtype) self._tablespec = tspec_builder.build()
def main(args): print(args) local_env = common.LocalEnvironment('PGSQL_USER', 'PGSQL_PASSWORD') local_env.init() pgsql_user = local_env.get_variable('PGSQL_USER') pgsql_password = local_env.get_variable('PGSQL_PASSWORD') yaml_config = common.read_config_file(args['<initfile>']) print(common.jsonpretty(yaml_config)) db_host = yaml_config['globals']['database_host'] db_name = yaml_config['globals']['database_name'] pubsub = pgpubsub.connect(host=db_host, user=pgsql_user, password=pgsql_password, database=db_name) channel_id = args['<channel>'] if not yaml_config['channels'].get(channel_id): raise NoSuchEventChannel(channel_id) handler_module_name = yaml_config['globals']['handler_module'] project_dir = common.load_config_var(yaml_config['globals']['project_dir']) sys.path.append(project_dir) handlers = __import__(handler_module_name) handler_function_name = yaml_config['channels'][channel_id][ 'handler_function'] if not hasattr(handlers, handler_function_name): raise NoSuchEventHandler(handler_function_name, handler_module_name) handler_function = getattr(handlers, handler_function_name) service_objects = common.ServiceObjectRegistry( snap.initialize_services(yaml_config, logger)) pubsub.listen(channel_id) print('listening on channel "%s"...' % channel_id) for event in pubsub.events(): print(event.payload)
def build(self): service_object_dict = snap.initialize_services(self._transform_config) so_registry = common.ServiceObjectRegistry(service_object_dict) datasource_name = self._transform_config['maps'][ self._map_name]['lookup_source'] datasource = self.load_datasource(datasource_name, self._transform_config, so_registry) transformer = RecordTransformer() for field_config in self._transform_config['maps'][ self._map_name]['fields']: for fieldname, field_config in field_config.items(): transformer.add_target_field(fieldname) if not field_config: # if there is no config for this field name, fill the output column with an empty value transformer.map_const_to_target_field(fieldname, '') elif field_config['source'] == 'record': # if no key is supplied, assume that the fieldname in the source is the same as the target fieldname transformer.map_source_to_target_field( field_config.get('key', fieldname), fieldname) elif field_config['source'].startswith('lookup'): if field_config['source'] == ( 'lookup' ): # we infere the lookup function name as lookup_<field_name>(...) transformer.register_datasource(fieldname, datasource) else: # the source field gives an explicit function name starting with 'lookup_' transformer.register_datasource_with_explicit_function( fieldname, datasource, field_config['source']) elif field_config['source'] == 'value': transformer.map_const_to_target_field( fieldname, field_config['value']) else: raise Exception( 'unrecognized source type "%s." Allowed types are record, lookup, and value.' % field_config['source']) output_fields = [] for field_config in self._transform_config['maps'][ self._map_name]['fields']: for key, value in field_config.items(): output_fields.append(key) transformer.set_csv_output_header(output_fields) return transformer
def main(args): configfile = args['<config_file>'] yaml_config = common.read_config_file(configfile) services = common.ServiceObjectRegistry( snap.initialize_services(yaml_config)) topic_name = args['<topic>'] tkservice = services.lookup('telekast') topic = tkservice.get_topic(topic_name) hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name', 'record_type') rfactory = tkcore.PipelineRecordFactory(payload_field_name='data') header = hfactory.create(pipeline_name='cdm_test', record_type='cdm') record = rfactory.create(header, {'message': 'test'}) msg_count = 1000000 time_log = jrnl.TimeLog() with topic.get_producer(use_rdkafka=True, serializer=default_dict_serializer, min_queued_messages=250000, max_queued_messages=500000, linger_ms=5) as producer: payload = uuid.uuid4() with jrnl.stopwatch('ingest_records', time_log): for i in range(msg_count): header = hfactory.create(pipeline_name='test', record_type='test_record') record = rfactory.create(header, **{ 'message': payload, 'tag': i }) producer.produce(record) if not i % 100000: print('%d messages sent.' % i) print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name)) print(time_log.readout)
def main(args): init_filename = args['<init_file>'] yaml_config = common.read_config_file(init_filename) logger = tdx.init_logging(LOG_TAG, 'reset.log', logging.DEBUG) service_objects = snap.initialize_services(yaml_config, logger) so_registry = common.ServiceObjectRegistry(service_objects) redis_svc = so_registry.lookup('redis') couchbase_svc = so_registry.lookup('couchbase') transforms = redis_svc.transformed_record_queue num_records_cleared = 0 while True: key = transforms.pop() if not key: break try: couchbase_svc.data_manager.bucket.remove(key) num_records_cleared += 1 if not num_records_cleared % 100000: print '%d records cleared.' % num_records_cleared except Exception, err: print('%s thrown while clearing record ID %s: %s' % (err.__class__.__name, key, err.message))
def main(args): init_filename = args['<init_file>'] pipeline_name = args['<pipeline_name>'] yaml_config = common.read_config_file(init_filename) logger = tdx.init_logging('mkpipeline', 'jobstat.log', logging.DEBUG) service_objects = snap.initialize_services(yaml_config, logger) so_registry = common.ServiceObjectRegistry(service_objects) couchbase_svc = so_registry.lookup('couchbase') jrnl_mgr = couchbase_svc.journal_manager jrnl_mgr.register_keygen_function(const.RECTYPE_PIPELINE, generate_pipeline_key) new_pipeline = PipelineRecord(pipeline_name, 'for populating orders star schema') try: key = jrnl_mgr.insert_record(new_pipeline) print('created pipeline "%s".' % new_pipeline.name) exit(0) except couchbase.exceptions.KeyExistsError, err: print('error: there is already a pipeline named "%s".' % pipeline_name) exit(1)
def build(self): service_object_dict = snap.initialize_services(self._transform_config) so_registry = common.ServiceObjectRegistry(service_object_dict) datasource_name = self._transform_config['maps'][self._map_name]['lookup_source'] datasource = self.load_datasource(datasource_name, self._transform_config, so_registry) transformer = RecordTransformer() for fieldname in self._transform_config['maps'][self._map_name]['fields']: transformer.add_target_field(fieldname) field_config = self._transform_config['maps'][self._map_name]['fields'][fieldname] if field_config['source'] == 'record': # if no key is supplied, assume that the fieldname in the source is the same as the target fieldname transformer.map_source_to_target_field(field_config.get('key', fieldname), fieldname) elif field_config['source'] == 'lookup': transformer.register_datasource(fieldname, datasource) elif field_config['source'] == 'value': transformer.map_const_to_target_field(fieldname, field_config['value']) else: raise Exception('unrecognized source type "%s." Allowed types are record, lookup, and value.' % field_config['source']) return transformer
def main(args): configfile = args['<configfile>'] yaml_config = None with open(configfile) as f: yaml_config = yaml.load(f) services = common.ServiceObjectRegistry( snap.initialize_services(yaml_config)) cognito_svc = services.lookup('cognito') result = None if args['--verify']: attr_name = args['<attribute>'] code = args['<code>'] line = read_stdin() if line: access_token = line return cognito_svc.verify_named_attribute(attr_name, access_token, code) if args['--verification-code']: line = read_stdin() if line: access_token = line result = cognito_svc.get_verification_code_for_named_attribute( args['<attribute>'], access_token) print(result) if args['--reset']: username = args['--username'] result = cognito_svc.reset_password(username) if args['--login']: username = args['--username'] password = args['--password'] result = cognito_svc.user_login(username, password) if args['--save-token']: if result.get('ChallengeName'): print(result['Session']) elif result['ResponseMetadata']['HTTPStatusCode'] == 200: print(result['AuthenticationResult']['AccessToken']) else: print(json.dumps(result)) if args['--newuser']: user_attrs = [] username = args['<username>'] user_attrs.append( awss.CognitoUserAttribute(name='email', value=username)) result = cognito_svc.user_create(username, user_attrs) print('sent cognito request with result:', file=sys.stderr) print(result) if args['--initpw']: username = args['--username'] new_password = args['--password'] session = None line = read_stdin() if line: session = line result = cognito_svc.change_initial_password(username, new_password, session) print('sent cognito request with result:', file=sys.stderr) print(json.dumps(result))
def register_service_objects(self, name, service_object_config): service_object_tbl = snap.initialize_services(service_object_config) self._svc_registry = common.ServiceObjectRegistry(service_object_tbl)
def build(self): service_object_dict = snap.initialize_services(self._transform_config) so_registry = common.ServiceObjectRegistry(service_object_dict) datasource_name = self._transform_config['maps'][ self._map_name]['lookup_source'] datasource = self.load_datasource(datasource_name, self._transform_config, so_registry) transformer = RecordTransformer() default_transform_funcname = self._transform_config['maps'][ self._map_name].get('default_transform') if default_transform_funcname: if not hasattr(datasource, default_transform_funcname): datasource_class_name = self._transform_config['sources'][ datasource_name]['class'] raise Exception( 'default transform routine "%s" designated, but not found in datasource %s.' % (default_transform_funcname, datasource_class_name)) default_transform_func = getattr(datasource, default_transform_funcname) transformer.set_default_transform(default_transform_func) for field_config in self._transform_config['maps'][ self._map_name]['fields']: for fieldname, field_config in field_config.items(): transformer.add_target_field(fieldname) if not field_config: # if there is no config for this target field name, default to same field in source transformer.map_source_to_target_field( fieldname, fieldname) elif field_config['source'] == 'record': # if no key is supplied, assume that the fieldname in the source is the same as the target fieldname source_fieldname = field_config.get('key', fieldname) transformer.map_source_to_target_field( source_fieldname, fieldname) elif field_config['source'].startswith('lookup'): if field_config['source'] == ( 'lookup' ): # we infer the lookup function name as lookup_<field_name>(...) lookup_function_name = 'lookup_%s' % fieldname else: # the source field gives an explicit function name starting with 'lookup_' lookup_function_name = field_config['source'] transformer.register_datasource_with_explicit_function( fieldname, datasource, lookup_function_name) elif field_config['source'] == 'value': if 'value' not in field_config: raise Exception( 'a mapped field with source = value must set the "value" field.' ) transformer.map_const_to_target_field( field_config['value'], fieldname) elif field_config['source'] == 'lambda': source_fieldname = field_config.get('key', fieldname) lambda_string = field_config.get('expression') if not lambda_string: raise Exception( 'a mapped field with source = lambda must set the "expression" field.' ) transformer.map_source_to_lambda(source_fieldname, fieldname, lambda_string) else: raise Exception( 'unrecognized source type "%s." Allowed types are record, lookup, and value.' % field_config['source']) output_fields = [] for field_config in self._transform_config['maps'][ self._map_name]['fields']: for key, value in field_config.items(): output_fields.append(key) transformer.set_csv_output_header(output_fields) return transformer
def main(args): configfile = args['<configfile>'] yaml_config = common.read_config_file(configfile) services = common.ServiceObjectRegistry( snap.initialize_services(yaml_config)) event_service = services.lookup(args['<service>'])
def main(args): #print(common.jsonpretty(args)) config_filename = args['<configfile>'] yaml_config = common.read_config_file(config_filename) service_object_registry = common.ServiceObjectRegistry( snap.initialize_services(yaml_config)) datastore_registry = DatastoreRegistry( initialize_datastores(yaml_config, service_object_registry)) preview_mode = False if args['--preview']: preview_mode = True limit = -1 if args.get('--limit') is not None: limit = int(args['--limit']) list_mode = False stream_input_mode = False file_input_mode = False available_ingest_targets = load_ingest_targets(yaml_config, datastore_registry) if args['--target'] == True and args['<datafile>'] is None: stream_input_mode = True ingest_target_name = args['<ingest_target>'] ingest_target = lookup_ingest_target_by_name(ingest_target_name, available_ingest_targets) buffer = initialize_record_buffer(ingest_target, datastore_registry) record_count = 0 with checkpoint(buffer, interval=ingest_target.checkpoint_interval): while True: if record_count == limit: break raw_line = sys.stdin.readline() line = raw_line.lstrip().rstrip() if not len(line): break if not preview_mode: buffer.write(line) else: print(line) record_count += 1 elif args['<datafile>']: file_input_mode = True input_file = args['<datafile>'] ingest_target_name = args['<ingest_target>'] ingest_target = lookup_ingest_target_by_name(ingest_target_name, available_ingest_targets) buffer = initialize_record_buffer(ingest_target, datastore_registry) record_count = 0 with checkpoint(buffer, interval=ingest_target.checkpoint_interval): with open(input_file) as f: for line in f: if record_count == limit: break if not preview_mode: buffer.write(line) else: print(line) record_count += 1 elif args['--list'] == True: if args['targets']: for target in yaml_config['ingest_targets']: print('::: Ingest target "%s": ' % target) print(common.jsonpretty(yaml_config['ingest_targets'][target])) if args['datastores']: for dstore in yaml_config['datastores']: print('::: Datastore alias "%s": ' % dstore) print(common.jsonpretty(yaml_config['datastores'][dstore])) if args['globals']: print('::: Global settings:') print(common.jsonpretty(yaml_config['globals']))
def main(args): if args['--version']: print(show_version()) return verbose_mode = False if args['--verbose']: verbose_mode = True configfile = args['<configfile>'] yaml_config = common.read_config_file(configfile) source_name = args['<source_name>'] if not yaml_config['sources'].get(source_name): raise Exception( 'No queue source "%s" defined. Please check your config file.') service_tbl = snap.initialize_services(yaml_config) service_registry = common.ServiceObjectRegistry(service_tbl) source_config = yaml_config['sources'][source_name] # Create SQS client region = source_config['region'] polling_interval = int(source_config['polling_interval_seconds']) sqs = boto3.client('sqs', region_name=region) queue_url = common.load_config_var(source_config['queue_url']) msg_handler_name = source_config['handler'] project_dir = common.load_config_var( yaml_config['globals']['project_home']) sys.path.append(project_dir) msg_handler_module = yaml_config['globals']['consumer_module'] msg_handler_func = common.load_class(msg_handler_name, msg_handler_module) child_procs = [] print('### initiating polling loop.') # loop forever while True: current_time = datetime.datetime.now().isoformat() if verbose_mode: print('### checking SQS queue %s for messages at %s...' % (queue_url, current_time), file=sys.stderr) # Receive message from SQS queue response = sqs.receive_message( QueueUrl=queue_url, AttributeNames=['SentTimestamp'], MaxNumberOfMessages=1, MessageAttributeNames=['All'], VisibilityTimeout=30, # VisibilityTimeout (integer) -- The duration (in seconds) that the received messages # are hidden from subsequent retrieve requests after being retrieved by a ReceiveMessage request. WaitTimeSeconds=3 # WaitTimeSeconds (integer) -- The duration (in seconds) for which the call waits for a message # to arrive in the queue before returning. # If a message is available, the call returns sooner than WaitTimeSeconds . If no messages are available # and the wait time expires, the call returns successfully with an empty list of messages. ) inbound_msgs = response.get('Messages') or [] if not len(inbound_msgs): if verbose_mode: print( '### No messages pending, sleeping %d seconds before re-try...' % polling_interval) time.sleep(polling_interval) continue for message in inbound_msgs: receipt_handle = message['ReceiptHandle'] current_time = datetime.datetime.now().isoformat() print('### spawning message processor at %s...' % current_time, file=sys.stderr) try: # TODO: can we pickle a ServiceObjectRegistry? p = Process(target=msg_handler_func, args=(message, receipt_handle, service_registry)) p.start() child_procs.append(p) print('### Queued message-handling subprocess with PID %s.' % p.pid, file=sys.stderr) # Delete received message from queue sqs.delete_message(QueueUrl=queue_url, ReceiptHandle=receipt_handle) except Exception as err: print('!!! Error processing message with receipt: %s' % receipt_handle, file=sys.stderr) print(err)