def main(args):
    configfile = args['<config_file>']
    yaml_config = common.read_config_file(configfile)
    services = common.ServiceObjectRegistry(snap.initialize_services(yaml_config))


    topic_name = args['<topic>']
    tkservice = services.lookup('telekast')
    topic = tkservice.get_topic(topic_name)

    hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name', 'record_type')
    rfactory = tkcore.PipelineRecordFactory(payload_field_name='data')

    msg_count = 2000000
    time_log = jrnl.TimeLog()

    prod_config = {
                "on_delivery": delivery_report,
                "bootstrap.servers": tkservice.connect_string,
                "group.id": "python_injector",
                "retry.backoff.ms": 3000,
                "retries": 5,
                "default.topic.config": {"request.required.acks": "1"},
                "max.in.flight.requests.per.connection": 1,
                "queue.buffering.max.messages": 100000,
                "batch.num.messages": 50000,
                "message.max.bytes": 2000000
            }

    
    producer = Producer(**prod_config)
    
        
    payload = uuid.uuid4()
    with jrnl.stopwatch('ingest_records', time_log):
        
        for i in range(msg_count):
            
            producer.poll(0)
            header = hfactory.create(pipeline_name='test',
                                        record_type='test_record')
            record = rfactory.create(header, **{'message': payload, 'tag': i})                
            producer.produce(topic_name, pickle.dumps(record), callback=delivery_report)
            if not i % 100000:
                
                print('%d messages sent.' % i)

        producer.flush()

    print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name))
    print(time_log.readout)
    if len(errors):
        print('!!! Errors sending messages:')
        print('\n'.join(errors))
Beispiel #2
0
 def __init__(self):
     self.target_record_fields = set()
     self.datasources = {}
     self.explicit_datasource_lookup_functions = {}
     self.field_map = {}
     self.value_map = FieldValueMap()
     self.output_header = []
     self.event_handlers = {}
     self.error_handlers = {}
     self.count_log = jrnl.CountLog()
     
     # this stat will show zero unless the process() method is called.
     # We do not record time stats for individual calls to the transform() method;
     # "processing_time" is the time spent in the process() method, which invokes transform()
     # once per inbound record.
     #
     # We initialize the elapsed processing time to zero by default.
     self.time_log = jrnl.TimeLog()
     current_time = datetime.datetime.now()
     self.time_log.record_elapsed_time('processing_time', current_time, current_time)
Beispiel #3
0
def main(args):
    configfile = args['<config_file>']
    yaml_config = common.read_config_file(configfile)
    services = common.ServiceObjectRegistry(
        snap.initialize_services(yaml_config))

    topic_name = args['<topic>']
    tkservice = services.lookup('telekast')
    topic = tkservice.get_topic(topic_name)

    hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name',
                                                  'record_type')
    rfactory = tkcore.PipelineRecordFactory(payload_field_name='data')

    header = hfactory.create(pipeline_name='cdm_test', record_type='cdm')
    record = rfactory.create(header, {'message': 'test'})

    msg_count = 1000000
    time_log = jrnl.TimeLog()

    with topic.get_producer(use_rdkafka=True,
                            serializer=default_dict_serializer,
                            min_queued_messages=250000,
                            max_queued_messages=500000,
                            linger_ms=5) as producer:

        payload = uuid.uuid4()
        with jrnl.stopwatch('ingest_records', time_log):
            for i in range(msg_count):
                header = hfactory.create(pipeline_name='test',
                                         record_type='test_record')
                record = rfactory.create(header, **{
                    'message': payload,
                    'tag': i
                })
                producer.produce(record)
                if not i % 100000:
                    print('%d messages sent.' % i)

    print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name))
    print(time_log.readout)
Beispiel #4
0
 def reset_logs(self):
     self.time_log = jrnl.TimeLog()
     self.count_log = jrnl.CountLog()