def main(args): configfile = args['<config_file>'] yaml_config = common.read_config_file(configfile) services = common.ServiceObjectRegistry(snap.initialize_services(yaml_config)) topic_name = args['<topic>'] tkservice = services.lookup('telekast') topic = tkservice.get_topic(topic_name) hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name', 'record_type') rfactory = tkcore.PipelineRecordFactory(payload_field_name='data') msg_count = 2000000 time_log = jrnl.TimeLog() prod_config = { "on_delivery": delivery_report, "bootstrap.servers": tkservice.connect_string, "group.id": "python_injector", "retry.backoff.ms": 3000, "retries": 5, "default.topic.config": {"request.required.acks": "1"}, "max.in.flight.requests.per.connection": 1, "queue.buffering.max.messages": 100000, "batch.num.messages": 50000, "message.max.bytes": 2000000 } producer = Producer(**prod_config) payload = uuid.uuid4() with jrnl.stopwatch('ingest_records', time_log): for i in range(msg_count): producer.poll(0) header = hfactory.create(pipeline_name='test', record_type='test_record') record = rfactory.create(header, **{'message': payload, 'tag': i}) producer.produce(topic_name, pickle.dumps(record), callback=delivery_report) if not i % 100000: print('%d messages sent.' % i) producer.flush() print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name)) print(time_log.readout) if len(errors): print('!!! Errors sending messages:') print('\n'.join(errors))
def __init__(self): self.target_record_fields = set() self.datasources = {} self.explicit_datasource_lookup_functions = {} self.field_map = {} self.value_map = FieldValueMap() self.output_header = [] self.event_handlers = {} self.error_handlers = {} self.count_log = jrnl.CountLog() # this stat will show zero unless the process() method is called. # We do not record time stats for individual calls to the transform() method; # "processing_time" is the time spent in the process() method, which invokes transform() # once per inbound record. # # We initialize the elapsed processing time to zero by default. self.time_log = jrnl.TimeLog() current_time = datetime.datetime.now() self.time_log.record_elapsed_time('processing_time', current_time, current_time)
def main(args): configfile = args['<config_file>'] yaml_config = common.read_config_file(configfile) services = common.ServiceObjectRegistry( snap.initialize_services(yaml_config)) topic_name = args['<topic>'] tkservice = services.lookup('telekast') topic = tkservice.get_topic(topic_name) hfactory = tkcore.PipelineRecordHeaderFactory('pipeline_name', 'record_type') rfactory = tkcore.PipelineRecordFactory(payload_field_name='data') header = hfactory.create(pipeline_name='cdm_test', record_type='cdm') record = rfactory.create(header, {'message': 'test'}) msg_count = 1000000 time_log = jrnl.TimeLog() with topic.get_producer(use_rdkafka=True, serializer=default_dict_serializer, min_queued_messages=250000, max_queued_messages=500000, linger_ms=5) as producer: payload = uuid.uuid4() with jrnl.stopwatch('ingest_records', time_log): for i in range(msg_count): header = hfactory.create(pipeline_name='test', record_type='test_record') record = rfactory.create(header, **{ 'message': payload, 'tag': i }) producer.produce(record) if not i % 100000: print('%d messages sent.' % i) print('%d messages sent to Kafka topic %s.' % (msg_count, topic_name)) print(time_log.readout)
def reset_logs(self): self.time_log = jrnl.TimeLog() self.count_log = jrnl.CountLog()