Beispiel #1
0
def load_snap_config(mode, app):
    config_file_path = None
    if mode == 'standalone':
        parser = argparse.ArgumentParser()
        parser.add_argument("--configfile",
                            metavar='<configfile>',
                            required=True,
                            nargs=1,
                            help='YAML config file for snap endpoints')

        args = parser.parse_args()
        config_file_path = common.full_path(args.configfile[0])
    elif mode == 'server':
        config_file_path=os.getenv('SNAP_CONFIG')
        filename = os.path.join(app.instance_path, 'application.cfg')
        print('generated config path is %s' % filename)

    else:
        print('valid setup modes are "standalone" and "server".')
        exit(1)
        
    if not config_file_path:
        print('please set the "configfile" environment variable in the WSGI command string.')
        exit(1)
        
    return common.read_config_file(config_file_path)
Beispiel #2
0
def load_snap_config(mode, app):
    config_file_path = None
    if mode == 'standalone':
        parser = argparse.ArgumentParser()
        parser.add_argument("--configfile",
                            metavar='<configfile>',
                            required=True,
                            nargs=1,
                            help='YAML config file for snap endpoints')

        args = parser.parse_args()
        config_file_path = common.full_path(args.configfile[0])
    elif mode == 'server':
        config_file_path=os.getenv('SNAP_CONFIG')
        filename = os.path.join(app.instance_path, 'application.cfg')
        print('generated config path is %s' % filename)

    else:
        print('valid setup modes are "standalone" and "server".')
        exit(1)
        
    if not config_file_path:
        print('please set the "configfile" environment variable in the WSGI command string.')
        exit(1)
        
    return common.read_config_file(config_file_path)
Beispiel #3
0
def main(args):

    configfile = args['<configfile>']
    yaml_config = common.read_config_file(configfile)
    service_tbl = snap.initialize_services(yaml_config)

    registry = common.ServiceObjectRegistry(service_tbl)

    sms_svc = registry.lookup('sms')
    sid = sms_svc.send_sms('9174176968', 'hello NET')
    print(sid)
def main(args):
    configfile = args['<config_file>']
    yaml_config = common.read_config_file(configfile)
    services = common.ServiceObjectRegistry(snap.initialize_services(yaml_config))


    topic_name = args['<topic>']
    tkservice = services.lookup('telekast')
    topic = tkservice.get_topic(topic_name)

    hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name', 'record_type')
    rfactory = tkcore.PipelineRecordFactory(payload_field_name='data')

    msg_count = 2000000
    time_log = jrnl.TimeLog()

    prod_config = {
                "on_delivery": delivery_report,
                "bootstrap.servers": tkservice.connect_string,
                "group.id": "python_injector",
                "retry.backoff.ms": 3000,
                "retries": 5,
                "default.topic.config": {"request.required.acks": "1"},
                "max.in.flight.requests.per.connection": 1,
                "queue.buffering.max.messages": 100000,
                "batch.num.messages": 50000,
                "message.max.bytes": 2000000
            }

    
    producer = Producer(**prod_config)
    
        
    payload = uuid.uuid4()
    with jrnl.stopwatch('ingest_records', time_log):
        
        for i in range(msg_count):
            
            producer.poll(0)
            header = hfactory.create(pipeline_name='test',
                                        record_type='test_record')
            record = rfactory.create(header, **{'message': payload, 'tag': i})                
            producer.produce(topic_name, pickle.dumps(record), callback=delivery_report)
            if not i % 100000:
                
                print('%d messages sent.' % i)

        producer.flush()

    print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name))
    print(time_log.readout)
    if len(errors):
        print('!!! Errors sending messages:')
        print('\n'.join(errors))
Beispiel #5
0
def main(args):
    init_filename = args['<init_file>']
    yaml_config = common.read_config_file(init_filename)
    pipeline_id = yaml_config['globals']['pipeline_id']
    logger = tdx.init_logging(LOG_TAG, 'reset.log', logging.DEBUG)
    service_objects = snap.initialize_services(yaml_config, logger)
    so_registry = common.ServiceObjectRegistry(service_objects)
    redis_svc = so_registry.lookup('redis')

    redis_svc.redis_server.instance().delete(redis_svc.get_transformed_record_queue(pipeline_id))
    redis_svc.redis_server.instance().delete(redis_svc.get_raw_record_queue(pipeline_id))
    redis_svc.redis_server.instance().delete(redis_svc.get_generator_to_user_map(pipeline_id))
Beispiel #6
0
def main(args):
    init_filename = args['<initfile>']
    yaml_config = common.read_config_file(init_filename)
    log_directory = yaml_config['globals']['log_directory']
    log_filename = 'forge.log'
    log = metl.init_logging('mx_forge',
                            os.path.join(log_directory, log_filename),
                            logging.DEBUG)

    services = snap.initialize_services(yaml_config, log)
    so_registry = common.ServiceObjectRegistry(services)
    forge_cli = ForgeCLI('forge', so_registry)
    forge_cli.cmdloop()
def main(args):
    yaml_config = common.read_config_file(args['<initfile>'])
    if args['--channel'] == False:
        print('\n'.join(yaml_config['channels'].keys()))
        return 0

    event_channel = args['<event_channel>']
    if not yaml_config['channels'].get(event_channel):
        raise eavesdroppr.NoSuchEventChannel(event_channel)

    channel_config = yaml_config['channels'][event_channel]

    operation = channel_config['db_operation']
    if not operation in SUPPORTED_DB_OPS:
        raise eavesdroppr.UnsupportedDBOperation(operation)

    table_name = channel_config['db_table_name']
    db_schema = channel_config.get('db_schema') or 'public'
    proc_name = channel_config.get(
        'db_proc_name') or '%s_%s_notify' % (table_name, operation.lower())
    trigger_name = channel_config.get(
        'db_trigger_name') or 'trg_%s_%s' % (table_name, operation.lower())
    source_fields = channel_config['payload_fields']

    j2env = jinja2.Environment()
    template_mgr = common.JinjaTemplateManager(j2env)
    json_func_template = j2env.from_string(JSON_BUILD_FUNC_TEMPLATE)
    json_func = json_func_template.render(payload_fields=source_fields)

    pk_field = channel_config['pk_field_name']
    pk_type = channel_config['pk_field_type']

    if args['--proc']:
        print(
            PROC_TEMPLATE.format(schema=db_schema,
                                 pk_field_name=pk_field,
                                 pk_field_type=pk_type,
                                 channel_name=event_channel,
                                 json_build_func=json_func))

    elif args['--trigger']:
        print(
            TRIGGER_TEMPLATE.format(schema=db_schema,
                                    table_name=table_name,
                                    trigger_name=trigger_name,
                                    db_proc_name=proc_name,
                                    db_op=operation))
Beispiel #8
0
def main(args):
    print(args)

    local_env = common.LocalEnvironment('PGSQL_USER', 'PGSQL_PASSWORD')
    local_env.init()

    pgsql_user = local_env.get_variable('PGSQL_USER')
    pgsql_password = local_env.get_variable('PGSQL_PASSWORD')

    yaml_config = common.read_config_file(args['<initfile>'])

    print(common.jsonpretty(yaml_config))

    db_host = yaml_config['globals']['database_host']
    db_name = yaml_config['globals']['database_name']

    pubsub = pgpubsub.connect(host=db_host,
                              user=pgsql_user,
                              password=pgsql_password,
                              database=db_name)

    channel_id = args['<channel>']

    if not yaml_config['channels'].get(channel_id):
        raise NoSuchEventChannel(channel_id)

    handler_module_name = yaml_config['globals']['handler_module']

    project_dir = common.load_config_var(yaml_config['globals']['project_dir'])
    sys.path.append(project_dir)
    handlers = __import__(handler_module_name)
    handler_function_name = yaml_config['channels'][channel_id][
        'handler_function']

    if not hasattr(handlers, handler_function_name):
        raise NoSuchEventHandler(handler_function_name, handler_module_name)

    handler_function = getattr(handlers, handler_function_name)
    service_objects = common.ServiceObjectRegistry(
        snap.initialize_services(yaml_config, logger))

    pubsub.listen(channel_id)
    print('listening on channel "%s"...' % channel_id)
    for event in pubsub.events():
        print(event.payload)
Beispiel #9
0
def main(args):
    configfile = args['<config_file>']
    yaml_config = common.read_config_file(configfile)
    services = common.ServiceObjectRegistry(
        snap.initialize_services(yaml_config))

    topic_name = args['<topic>']
    tkservice = services.lookup('telekast')
    topic = tkservice.get_topic(topic_name)

    hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name',
                                                  'record_type')
    rfactory = tkcore.PipelineRecordFactory(payload_field_name='data')

    header = hfactory.create(pipeline_name='cdm_test', record_type='cdm')
    record = rfactory.create(header, {'message': 'test'})

    msg_count = 1000000
    time_log = jrnl.TimeLog()

    with topic.get_producer(use_rdkafka=True,
                            serializer=default_dict_serializer,
                            min_queued_messages=250000,
                            max_queued_messages=500000,
                            linger_ms=5) as producer:

        payload = uuid.uuid4()
        with jrnl.stopwatch('ingest_records', time_log):
            for i in range(msg_count):
                header = hfactory.create(pipeline_name='test',
                                         record_type='test_record')
                record = rfactory.create(header, **{
                    'message': payload,
                    'tag': i
                })
                producer.produce(record)
                if not i % 100000:
                    print('%d messages sent.' % i)

    print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name))
    print(time_log.readout)
Beispiel #10
0
def main(args):
    init_filename = args['<init_file>']
    yaml_config = common.read_config_file(init_filename)
    logger = tdx.init_logging(LOG_TAG, 'reset.log', logging.DEBUG)
    service_objects = snap.initialize_services(yaml_config, logger)
    so_registry = common.ServiceObjectRegistry(service_objects)
    redis_svc = so_registry.lookup('redis')
    couchbase_svc = so_registry.lookup('couchbase')

    transforms = redis_svc.transformed_record_queue
    num_records_cleared = 0
    while True:
        key = transforms.pop()
        if not key:
            break
        try:
            couchbase_svc.data_manager.bucket.remove(key)
            num_records_cleared += 1
            if not num_records_cleared % 100000:
                print '%d records cleared.' % num_records_cleared
        except Exception, err:
            print('%s thrown while clearing record ID %s: %s' %
                  (err.__class__.__name, key, err.message))
Beispiel #11
0
def main(args):
    init_filename = args['<init_file>']
    pipeline_name = args['<pipeline_name>']

    yaml_config = common.read_config_file(init_filename)
    logger = tdx.init_logging('mkpipeline', 'jobstat.log', logging.DEBUG)
    service_objects = snap.initialize_services(yaml_config, logger)
    so_registry = common.ServiceObjectRegistry(service_objects)
    couchbase_svc = so_registry.lookup('couchbase')
    jrnl_mgr = couchbase_svc.journal_manager

    jrnl_mgr.register_keygen_function(const.RECTYPE_PIPELINE,
                                      generate_pipeline_key)
    new_pipeline = PipelineRecord(pipeline_name,
                                  'for populating orders star schema')

    try:
        key = jrnl_mgr.insert_record(new_pipeline)
        print('created pipeline "%s".' % new_pipeline.name)
        exit(0)
    except couchbase.exceptions.KeyExistsError, err:
        print('error: there is already a pipeline named "%s".' % pipeline_name)
        exit(1)
Beispiel #12
0
def main(args):
    configfile = args['<configfile>']
    yaml_config = common.read_config_file(configfile)
    services = common.ServiceObjectRegistry(
        snap.initialize_services(yaml_config))
    event_service = services.lookup(args['<service>'])
Beispiel #13
0
def main(args):
    #print(common.jsonpretty(args))
    config_filename = args['<configfile>']
    yaml_config = common.read_config_file(config_filename)
    service_object_registry = common.ServiceObjectRegistry(
        snap.initialize_services(yaml_config))
    datastore_registry = DatastoreRegistry(
        initialize_datastores(yaml_config, service_object_registry))

    preview_mode = False
    if args['--preview']:
        preview_mode = True

    limit = -1
    if args.get('--limit') is not None:
        limit = int(args['--limit'])
    list_mode = False
    stream_input_mode = False
    file_input_mode = False

    available_ingest_targets = load_ingest_targets(yaml_config,
                                                   datastore_registry)

    if args['--target'] == True and args['<datafile>'] is None:
        stream_input_mode = True
        ingest_target_name = args['<ingest_target>']
        ingest_target = lookup_ingest_target_by_name(ingest_target_name,
                                                     available_ingest_targets)
        buffer = initialize_record_buffer(ingest_target, datastore_registry)

        record_count = 0
        with checkpoint(buffer, interval=ingest_target.checkpoint_interval):
            while True:
                if record_count == limit:
                    break
                raw_line = sys.stdin.readline()
                line = raw_line.lstrip().rstrip()
                if not len(line):
                    break
                if not preview_mode:
                    buffer.write(line)
                else:
                    print(line)
                record_count += 1

    elif args['<datafile>']:
        file_input_mode = True
        input_file = args['<datafile>']
        ingest_target_name = args['<ingest_target>']
        ingest_target = lookup_ingest_target_by_name(ingest_target_name,
                                                     available_ingest_targets)
        buffer = initialize_record_buffer(ingest_target, datastore_registry)

        record_count = 0
        with checkpoint(buffer, interval=ingest_target.checkpoint_interval):
            with open(input_file) as f:
                for line in f:
                    if record_count == limit:
                        break
                    if not preview_mode:
                        buffer.write(line)
                    else:
                        print(line)
                    record_count += 1

    elif args['--list'] == True:
        if args['targets']:
            for target in yaml_config['ingest_targets']:
                print('::: Ingest target "%s": ' % target)
                print(common.jsonpretty(yaml_config['ingest_targets'][target]))

        if args['datastores']:
            for dstore in yaml_config['datastores']:
                print('::: Datastore alias "%s": ' % dstore)
                print(common.jsonpretty(yaml_config['datastores'][dstore]))

        if args['globals']:
            print('::: Global settings:')
            print(common.jsonpretty(yaml_config['globals']))
Beispiel #14
0
def main(args):
    if args['--version']:
        print(show_version())
        return

    verbose_mode = False
    if args['--verbose']:
        verbose_mode = True

    configfile = args['<configfile>']
    yaml_config = common.read_config_file(configfile)

    source_name = args['<source_name>']
    if not yaml_config['sources'].get(source_name):
        raise Exception(
            'No queue source "%s" defined. Please check your config file.')

    service_tbl = snap.initialize_services(yaml_config)
    service_registry = common.ServiceObjectRegistry(service_tbl)
    source_config = yaml_config['sources'][source_name]

    # Create SQS client
    region = source_config['region']
    polling_interval = int(source_config['polling_interval_seconds'])

    sqs = boto3.client('sqs', region_name=region)
    queue_url = common.load_config_var(source_config['queue_url'])
    msg_handler_name = source_config['handler']
    project_dir = common.load_config_var(
        yaml_config['globals']['project_home'])
    sys.path.append(project_dir)

    msg_handler_module = yaml_config['globals']['consumer_module']
    msg_handler_func = common.load_class(msg_handler_name, msg_handler_module)

    child_procs = []

    print('### initiating polling loop.')

    # loop forever
    while True:
        current_time = datetime.datetime.now().isoformat()
        if verbose_mode:
            print('### checking SQS queue %s for messages at %s...' %
                  (queue_url, current_time),
                  file=sys.stderr)

        # Receive message from SQS queue
        response = sqs.receive_message(
            QueueUrl=queue_url,
            AttributeNames=['SentTimestamp'],
            MaxNumberOfMessages=1,
            MessageAttributeNames=['All'],
            VisibilityTimeout=30,
            # VisibilityTimeout (integer) -- The duration (in seconds) that the received messages
            # are hidden from subsequent retrieve requests after being retrieved by a ReceiveMessage request.
            WaitTimeSeconds=3
            # WaitTimeSeconds (integer) -- The duration (in seconds) for which the call waits for a message
            # to arrive in the queue before returning.
            # If a message is available, the call returns sooner than WaitTimeSeconds . If no messages are available
            # and the wait time expires, the call returns successfully with an empty list of messages.
        )

        inbound_msgs = response.get('Messages') or []
        if not len(inbound_msgs):
            if verbose_mode:
                print(
                    '### No messages pending, sleeping %d seconds before re-try...'
                    % polling_interval)

            time.sleep(polling_interval)
            continue

        for message in inbound_msgs:
            receipt_handle = message['ReceiptHandle']
            current_time = datetime.datetime.now().isoformat()
            print('### spawning message processor at %s...' % current_time,
                  file=sys.stderr)

            try:
                # TODO: can we pickle a ServiceObjectRegistry?
                p = Process(target=msg_handler_func,
                            args=(message, receipt_handle, service_registry))
                p.start()
                child_procs.append(p)
                print('### Queued message-handling subprocess with PID %s.' %
                      p.pid,
                      file=sys.stderr)

                # Delete received message from queue
                sqs.delete_message(QueueUrl=queue_url,
                                   ReceiptHandle=receipt_handle)

            except Exception as err:
                print('!!! Error processing message with receipt: %s' %
                      receipt_handle,
                      file=sys.stderr)
                print(err)