def load_snap_config(mode, app): config_file_path = None if mode == 'standalone': parser = argparse.ArgumentParser() parser.add_argument("--configfile", metavar='<configfile>', required=True, nargs=1, help='YAML config file for snap endpoints') args = parser.parse_args() config_file_path = common.full_path(args.configfile[0]) elif mode == 'server': config_file_path=os.getenv('SNAP_CONFIG') filename = os.path.join(app.instance_path, 'application.cfg') print('generated config path is %s' % filename) else: print('valid setup modes are "standalone" and "server".') exit(1) if not config_file_path: print('please set the "configfile" environment variable in the WSGI command string.') exit(1) return common.read_config_file(config_file_path)
def main(args): configfile = args['<configfile>'] yaml_config = common.read_config_file(configfile) service_tbl = snap.initialize_services(yaml_config) registry = common.ServiceObjectRegistry(service_tbl) sms_svc = registry.lookup('sms') sid = sms_svc.send_sms('9174176968', 'hello NET') print(sid)
def main(args): configfile = args['<config_file>'] yaml_config = common.read_config_file(configfile) services = common.ServiceObjectRegistry(snap.initialize_services(yaml_config)) topic_name = args['<topic>'] tkservice = services.lookup('telekast') topic = tkservice.get_topic(topic_name) hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name', 'record_type') rfactory = tkcore.PipelineRecordFactory(payload_field_name='data') msg_count = 2000000 time_log = jrnl.TimeLog() prod_config = { "on_delivery": delivery_report, "bootstrap.servers": tkservice.connect_string, "group.id": "python_injector", "retry.backoff.ms": 3000, "retries": 5, "default.topic.config": {"request.required.acks": "1"}, "max.in.flight.requests.per.connection": 1, "queue.buffering.max.messages": 100000, "batch.num.messages": 50000, "message.max.bytes": 2000000 } producer = Producer(**prod_config) payload = uuid.uuid4() with jrnl.stopwatch('ingest_records', time_log): for i in range(msg_count): producer.poll(0) header = hfactory.create(pipeline_name='test', record_type='test_record') record = rfactory.create(header, **{'message': payload, 'tag': i}) producer.produce(topic_name, pickle.dumps(record), callback=delivery_report) if not i % 100000: print('%d messages sent.' % i) producer.flush() print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name)) print(time_log.readout) if len(errors): print('!!! Errors sending messages:') print('\n'.join(errors))
def main(args): init_filename = args['<init_file>'] yaml_config = common.read_config_file(init_filename) pipeline_id = yaml_config['globals']['pipeline_id'] logger = tdx.init_logging(LOG_TAG, 'reset.log', logging.DEBUG) service_objects = snap.initialize_services(yaml_config, logger) so_registry = common.ServiceObjectRegistry(service_objects) redis_svc = so_registry.lookup('redis') redis_svc.redis_server.instance().delete(redis_svc.get_transformed_record_queue(pipeline_id)) redis_svc.redis_server.instance().delete(redis_svc.get_raw_record_queue(pipeline_id)) redis_svc.redis_server.instance().delete(redis_svc.get_generator_to_user_map(pipeline_id))
def main(args): init_filename = args['<initfile>'] yaml_config = common.read_config_file(init_filename) log_directory = yaml_config['globals']['log_directory'] log_filename = 'forge.log' log = metl.init_logging('mx_forge', os.path.join(log_directory, log_filename), logging.DEBUG) services = snap.initialize_services(yaml_config, log) so_registry = common.ServiceObjectRegistry(services) forge_cli = ForgeCLI('forge', so_registry) forge_cli.cmdloop()
def main(args): yaml_config = common.read_config_file(args['<initfile>']) if args['--channel'] == False: print('\n'.join(yaml_config['channels'].keys())) return 0 event_channel = args['<event_channel>'] if not yaml_config['channels'].get(event_channel): raise eavesdroppr.NoSuchEventChannel(event_channel) channel_config = yaml_config['channels'][event_channel] operation = channel_config['db_operation'] if not operation in SUPPORTED_DB_OPS: raise eavesdroppr.UnsupportedDBOperation(operation) table_name = channel_config['db_table_name'] db_schema = channel_config.get('db_schema') or 'public' proc_name = channel_config.get( 'db_proc_name') or '%s_%s_notify' % (table_name, operation.lower()) trigger_name = channel_config.get( 'db_trigger_name') or 'trg_%s_%s' % (table_name, operation.lower()) source_fields = channel_config['payload_fields'] j2env = jinja2.Environment() template_mgr = common.JinjaTemplateManager(j2env) json_func_template = j2env.from_string(JSON_BUILD_FUNC_TEMPLATE) json_func = json_func_template.render(payload_fields=source_fields) pk_field = channel_config['pk_field_name'] pk_type = channel_config['pk_field_type'] if args['--proc']: print( PROC_TEMPLATE.format(schema=db_schema, pk_field_name=pk_field, pk_field_type=pk_type, channel_name=event_channel, json_build_func=json_func)) elif args['--trigger']: print( TRIGGER_TEMPLATE.format(schema=db_schema, table_name=table_name, trigger_name=trigger_name, db_proc_name=proc_name, db_op=operation))
def main(args): print(args) local_env = common.LocalEnvironment('PGSQL_USER', 'PGSQL_PASSWORD') local_env.init() pgsql_user = local_env.get_variable('PGSQL_USER') pgsql_password = local_env.get_variable('PGSQL_PASSWORD') yaml_config = common.read_config_file(args['<initfile>']) print(common.jsonpretty(yaml_config)) db_host = yaml_config['globals']['database_host'] db_name = yaml_config['globals']['database_name'] pubsub = pgpubsub.connect(host=db_host, user=pgsql_user, password=pgsql_password, database=db_name) channel_id = args['<channel>'] if not yaml_config['channels'].get(channel_id): raise NoSuchEventChannel(channel_id) handler_module_name = yaml_config['globals']['handler_module'] project_dir = common.load_config_var(yaml_config['globals']['project_dir']) sys.path.append(project_dir) handlers = __import__(handler_module_name) handler_function_name = yaml_config['channels'][channel_id][ 'handler_function'] if not hasattr(handlers, handler_function_name): raise NoSuchEventHandler(handler_function_name, handler_module_name) handler_function = getattr(handlers, handler_function_name) service_objects = common.ServiceObjectRegistry( snap.initialize_services(yaml_config, logger)) pubsub.listen(channel_id) print('listening on channel "%s"...' % channel_id) for event in pubsub.events(): print(event.payload)
def main(args): configfile = args['<config_file>'] yaml_config = common.read_config_file(configfile) services = common.ServiceObjectRegistry( snap.initialize_services(yaml_config)) topic_name = args['<topic>'] tkservice = services.lookup('telekast') topic = tkservice.get_topic(topic_name) hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name', 'record_type') rfactory = tkcore.PipelineRecordFactory(payload_field_name='data') header = hfactory.create(pipeline_name='cdm_test', record_type='cdm') record = rfactory.create(header, {'message': 'test'}) msg_count = 1000000 time_log = jrnl.TimeLog() with topic.get_producer(use_rdkafka=True, serializer=default_dict_serializer, min_queued_messages=250000, max_queued_messages=500000, linger_ms=5) as producer: payload = uuid.uuid4() with jrnl.stopwatch('ingest_records', time_log): for i in range(msg_count): header = hfactory.create(pipeline_name='test', record_type='test_record') record = rfactory.create(header, **{ 'message': payload, 'tag': i }) producer.produce(record) if not i % 100000: print('%d messages sent.' % i) print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name)) print(time_log.readout)
def main(args): init_filename = args['<init_file>'] yaml_config = common.read_config_file(init_filename) logger = tdx.init_logging(LOG_TAG, 'reset.log', logging.DEBUG) service_objects = snap.initialize_services(yaml_config, logger) so_registry = common.ServiceObjectRegistry(service_objects) redis_svc = so_registry.lookup('redis') couchbase_svc = so_registry.lookup('couchbase') transforms = redis_svc.transformed_record_queue num_records_cleared = 0 while True: key = transforms.pop() if not key: break try: couchbase_svc.data_manager.bucket.remove(key) num_records_cleared += 1 if not num_records_cleared % 100000: print '%d records cleared.' % num_records_cleared except Exception, err: print('%s thrown while clearing record ID %s: %s' % (err.__class__.__name, key, err.message))
def main(args): init_filename = args['<init_file>'] pipeline_name = args['<pipeline_name>'] yaml_config = common.read_config_file(init_filename) logger = tdx.init_logging('mkpipeline', 'jobstat.log', logging.DEBUG) service_objects = snap.initialize_services(yaml_config, logger) so_registry = common.ServiceObjectRegistry(service_objects) couchbase_svc = so_registry.lookup('couchbase') jrnl_mgr = couchbase_svc.journal_manager jrnl_mgr.register_keygen_function(const.RECTYPE_PIPELINE, generate_pipeline_key) new_pipeline = PipelineRecord(pipeline_name, 'for populating orders star schema') try: key = jrnl_mgr.insert_record(new_pipeline) print('created pipeline "%s".' % new_pipeline.name) exit(0) except couchbase.exceptions.KeyExistsError, err: print('error: there is already a pipeline named "%s".' % pipeline_name) exit(1)
def main(args): configfile = args['<configfile>'] yaml_config = common.read_config_file(configfile) services = common.ServiceObjectRegistry( snap.initialize_services(yaml_config)) event_service = services.lookup(args['<service>'])
def main(args): #print(common.jsonpretty(args)) config_filename = args['<configfile>'] yaml_config = common.read_config_file(config_filename) service_object_registry = common.ServiceObjectRegistry( snap.initialize_services(yaml_config)) datastore_registry = DatastoreRegistry( initialize_datastores(yaml_config, service_object_registry)) preview_mode = False if args['--preview']: preview_mode = True limit = -1 if args.get('--limit') is not None: limit = int(args['--limit']) list_mode = False stream_input_mode = False file_input_mode = False available_ingest_targets = load_ingest_targets(yaml_config, datastore_registry) if args['--target'] == True and args['<datafile>'] is None: stream_input_mode = True ingest_target_name = args['<ingest_target>'] ingest_target = lookup_ingest_target_by_name(ingest_target_name, available_ingest_targets) buffer = initialize_record_buffer(ingest_target, datastore_registry) record_count = 0 with checkpoint(buffer, interval=ingest_target.checkpoint_interval): while True: if record_count == limit: break raw_line = sys.stdin.readline() line = raw_line.lstrip().rstrip() if not len(line): break if not preview_mode: buffer.write(line) else: print(line) record_count += 1 elif args['<datafile>']: file_input_mode = True input_file = args['<datafile>'] ingest_target_name = args['<ingest_target>'] ingest_target = lookup_ingest_target_by_name(ingest_target_name, available_ingest_targets) buffer = initialize_record_buffer(ingest_target, datastore_registry) record_count = 0 with checkpoint(buffer, interval=ingest_target.checkpoint_interval): with open(input_file) as f: for line in f: if record_count == limit: break if not preview_mode: buffer.write(line) else: print(line) record_count += 1 elif args['--list'] == True: if args['targets']: for target in yaml_config['ingest_targets']: print('::: Ingest target "%s": ' % target) print(common.jsonpretty(yaml_config['ingest_targets'][target])) if args['datastores']: for dstore in yaml_config['datastores']: print('::: Datastore alias "%s": ' % dstore) print(common.jsonpretty(yaml_config['datastores'][dstore])) if args['globals']: print('::: Global settings:') print(common.jsonpretty(yaml_config['globals']))
def main(args): if args['--version']: print(show_version()) return verbose_mode = False if args['--verbose']: verbose_mode = True configfile = args['<configfile>'] yaml_config = common.read_config_file(configfile) source_name = args['<source_name>'] if not yaml_config['sources'].get(source_name): raise Exception( 'No queue source "%s" defined. Please check your config file.') service_tbl = snap.initialize_services(yaml_config) service_registry = common.ServiceObjectRegistry(service_tbl) source_config = yaml_config['sources'][source_name] # Create SQS client region = source_config['region'] polling_interval = int(source_config['polling_interval_seconds']) sqs = boto3.client('sqs', region_name=region) queue_url = common.load_config_var(source_config['queue_url']) msg_handler_name = source_config['handler'] project_dir = common.load_config_var( yaml_config['globals']['project_home']) sys.path.append(project_dir) msg_handler_module = yaml_config['globals']['consumer_module'] msg_handler_func = common.load_class(msg_handler_name, msg_handler_module) child_procs = [] print('### initiating polling loop.') # loop forever while True: current_time = datetime.datetime.now().isoformat() if verbose_mode: print('### checking SQS queue %s for messages at %s...' % (queue_url, current_time), file=sys.stderr) # Receive message from SQS queue response = sqs.receive_message( QueueUrl=queue_url, AttributeNames=['SentTimestamp'], MaxNumberOfMessages=1, MessageAttributeNames=['All'], VisibilityTimeout=30, # VisibilityTimeout (integer) -- The duration (in seconds) that the received messages # are hidden from subsequent retrieve requests after being retrieved by a ReceiveMessage request. WaitTimeSeconds=3 # WaitTimeSeconds (integer) -- The duration (in seconds) for which the call waits for a message # to arrive in the queue before returning. # If a message is available, the call returns sooner than WaitTimeSeconds . If no messages are available # and the wait time expires, the call returns successfully with an empty list of messages. ) inbound_msgs = response.get('Messages') or [] if not len(inbound_msgs): if verbose_mode: print( '### No messages pending, sleeping %d seconds before re-try...' % polling_interval) time.sleep(polling_interval) continue for message in inbound_msgs: receipt_handle = message['ReceiptHandle'] current_time = datetime.datetime.now().isoformat() print('### spawning message processor at %s...' % current_time, file=sys.stderr) try: # TODO: can we pickle a ServiceObjectRegistry? p = Process(target=msg_handler_func, args=(message, receipt_handle, service_registry)) p.start() child_procs.append(p) print('### Queued message-handling subprocess with PID %s.' % p.pid, file=sys.stderr) # Delete received message from queue sqs.delete_message(QueueUrl=queue_url, ReceiptHandle=receipt_handle) except Exception as err: print('!!! Error processing message with receipt: %s' % receipt_handle, file=sys.stderr) print(err)