def produce(ctx, message, topic, key_name): """Produce an Avro-serialized message. """ settings = { 'bootstrap.servers': get_broker_url(ctx.parent), 'schema.registry.url': get_registry_url(ctx.parent), 'error_cb': error_cb, 'api.version.request': True, } p = avro.AvroProducer(settings) key = {'name': key_name} value = {'content': message} try: p.produce(topic=topic, key=key, value=value, key_schema=key_schema, value_schema=value_schema) except KeyboardInterrupt: pass if p.flush(30): print('Error: shutting down after flush timeout, ' 'but there are unsent messages.')
def connect(self): logger = logging.getLogger() if self.type == "None": return if self.type == "Kafka": logger.debug("brokers: {}, topic: {}".format( self.brokers, self.topic)) self.prod = Producer({ 'bootstrap.servers': self.brokers, 'default.topic.config': { 'message.timeout.ms': 30000, #'auto.offset.reset': 'smallest', } }) elif self.type == "CFKafka": src_file = os.path.join(self.tc_id, self.schema_file) logger.debug("Loading source Avro: '{}'".format(src_file)) val_schema = avro.load(src_file) logger.debug("brokers: {}, schema_reg: {}, topic: {}".format( self.brokers, self.schema_reg, self.topic)) self.prod = avro.AvroProducer( { 'bootstrap.servers': self.brokers, 'schema.registry.url': self.schema_reg, 'default.topic.config': { 'message.timeout.ms': 30000, #'auto.offset.reset': 'smallest', } }, default_value_schema=val_schema) elif self.type == "REST": logger.debug("rest_url_base: {}".format(self.rest_url_base))
def __init__(self, **kwargs) -> None: self.config = utils.retrieve(kwargs['--path_to_config'], kwargs['--section']) self.topic_name = kwargs['--topic'] self.num_partitions = kwargs['--partitions'] self.key = kwargs['--key'] self.producer = avro.AvroProducer(self.config) self.source_file = kwargs['--source_file'] self.key_schema = kwargs['--key_schema'] self.value_schema = kwargs['--value_schema'] self.file_extension = kwargs['--file_extension']
def plain_avro_producer( running_cluster_config: Dict[str, str], topic_and_partitions: Tuple[str, int]) -> confluent_avro.AvroProducer: """ Creates a plain `confluent_kafka.avro.AvroProducer` that can be used to publish messages. """ topic_id, partitions = topic_and_partitions producer_config = { "bootstrap.servers": running_cluster_config["broker"], "schema.registry.url": running_cluster_config["schema-registry"], } producer = confluent_avro.AvroProducer(producer_config) producer.produce = partial(producer.produce, topic=topic_id) return producer
def fly_avro_drones(bootstrap_servers, schema_registry_url, nmessages, default_value_schema_str=drone_schema_str, producer_dict_kwargs=None, topic_name="drones_raw", time_delay=0, drones=None): """ A simple example of sending structured messages from drones to a message broker. Args: bootstrap_servers (str): Comma separated string of Kafka servers schema_registry_url (str): Schema registry URL nmessages (int): Number of messages to send default_value_schema_str (str): String Avro schema compatible with mdrone messages producer_dict_kwargs (dict): Optional keyword arguments for producer topic_name (str): Topic name to which drone messages will be sent time_delay (int): Delay time between cycles when producing messages drones (iterable): Iterable of drones from which to generate messages Tip: Schemas should match the messages sent by drones. """ pdk = { 'bootstrap.servers': bootstrap_servers, 'schema.registry.url': schema_registry_url }, if isinstance(producer_dict_kwargs, dict): pdk.update(producer_dict_kwargs) producer = avro.AvroProducer( pdk, default_value_schema=avro.loads(default_value_schema_str)) z = len(str(nmessages)) # Pretty print cycle number for logging for i in range(nmessages): print("====MESSAGE SET {}====".format(str(i).zfill(z))) for drone in drones: msg = drone.message() print(msg) producer.produce(topic=topic_name, value={k: getattr(msg, k) for k in msg._fields}) time.sleep(time_delay) producer.flush()
def verify_avro(): from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro') # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request, 'default.topic.config': { 'produce.offset.report': True } } # Create producer if schema_registry_url: conf['schema.registry.url'] = schema_registry_url p = avro.AvroProducer(conf) else: p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry()) prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) str_value = 'abc' float_value = 32. combinations = [ dict(key=float_value, key_schema=prim_float), dict(value=float_value, value_schema=prim_float), dict(key={'name': 'abc'}, key_schema=basic), dict(value={'name': 'abc'}, value_schema=basic), dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float), dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string), dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic), dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string), dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic), dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float), ] # Consumer config cons_conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } for i, combo in enumerate(combinations): combo['topic'] = str(uuid.uuid4()) p.produce(**combo) p.poll(0) p.flush() # Create consumer conf = copy(cons_conf) if schema_registry_url: conf['schema.registry.url'] = schema_registry_url c = avro.AvroConsumer(conf) else: c = avro.AvroConsumer(conf, schema_registry=InMemorySchemaRegistry()) c.subscribe([combo['topic']]) while True: msg = c.poll(0) if msg is None: continue if msg.error(): if msg.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: break else: continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) c.commit(msg, async=False) # Close consumer c.close()
def verify_avro_https(): from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro') # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request } conf.update(testconf.get('schema_registry_https', {})) p = avro.AvroProducer(conf) prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) str_value = 'abc' float_value = 32.0 combinations = [ dict(key=float_value, key_schema=prim_float), dict(value=float_value, value_schema=prim_float), dict(key={'name': 'abc'}, key_schema=basic), dict(value={'name': 'abc'}, value_schema=basic), dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float), dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string), dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic), dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string), dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic), dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float), # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342) dict(value='', value_schema=prim_string, key=0.0, key_schema=prim_float), dict(value=0.0, value_schema=prim_float, key='', key_schema=prim_string), ] for i, combo in enumerate(combinations): combo['topic'] = str(uuid.uuid4()) combo['headers'] = [('index', str(i))] p.produce(**combo) p.flush() conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': generate_group_id(), 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'auto.offset.reset': 'earliest' } conf.update(testconf.get('schema_registry_https', {})) c = avro.AvroConsumer(conf) c.subscribe([(t['topic']) for t in combinations]) msgcount = 0 while msgcount < len(combinations): msg = c.poll(0) if msg is None or msg.error(): continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) # omit empty Avro fields from payload for comparison record_key = msg.key() record_value = msg.value() index = int(dict(msg.headers())['index']) if isinstance(msg.key(), dict): record_key = {k: v for k, v in msg.key().items() if v is not None} if isinstance(msg.value(), dict): record_value = { k: v for k, v in msg.value().items() if v is not None } assert combinations[index].get('key') == record_key assert combinations[index].get('value') == record_value c.commit() msgcount += 1 # Close consumer c.close()
def verify_avro(): from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro') # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request } # Create producer if schema_registry_url: conf['schema.registry.url'] = schema_registry_url p = avro.AvroProducer(conf) else: p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry()) prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) str_value = 'abc' float_value = 32. combinations = [ dict(key=float_value, key_schema=prim_float), dict(value=float_value, value_schema=prim_float), dict(key={'name': 'abc'}, key_schema=basic), dict(value={'name': 'abc'}, value_schema=basic), dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float), dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string), dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic), dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string), dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic), dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float), # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342) dict(value='', value_schema=prim_string, key=0., key_schema=prim_float), dict(value=0., value_schema=prim_float, key='', key_schema=prim_string), ] # Consumer config cons_conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'auto.offset.reset': 'earliest' } for i, combo in enumerate(combinations): combo['topic'] = str(uuid.uuid4()) p.produce(**combo) p.flush() # Create consumer conf = copy(cons_conf) if schema_registry_url: conf['schema.registry.url'] = schema_registry_url c = avro.AvroConsumer(conf) else: c = avro.AvroConsumer(conf, schema_registry=InMemorySchemaRegistry()) c.subscribe([combo['topic']]) while True: msg = c.poll(0) if msg is None: continue if msg.error(): if msg.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: break else: continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) # omit empty Avro fields from payload for comparison record_key = msg.key() record_value = msg.value() if isinstance(msg.key(), dict): record_key = { k: v for k, v in msg.key().items() if v is not None } if isinstance(msg.value(), dict): record_value = { k: v for k, v in msg.value().items() if v is not None } assert combo.get('key') == record_key assert combo.get('value') == record_value c.commit(msg, asynchronous=False) # Close consumer c.close()
def verify_avro_explicit_read_schema(): """ verify that reading Avro with explicit reader schema works""" from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro') # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request, 'default.topic.config': { 'produce.offset.report': True } } # Create producer if schema_registry_url: conf['schema.registry.url'] = schema_registry_url p = avro.AvroProducer(conf) else: p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry()) key_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) schema1 = avro.load(os.path.join(avsc_dir, "user_v1.avsc")) schema2 = avro.load(os.path.join(avsc_dir, "user_v2.avsc")) float_value = 32. val = {"name": "abc", "favorite_number": 42, "favorite_colo": "orange"} val1 = {"name": "abc"} combinations = [ dict(value=val, value_schema=schema2, key=float_value, key_schema=key_schema, reader_value_schema=schema1, reader_key_schema=key_schema), dict(value=val1, value_schema=schema1, key=float_value, key_schema=key_schema, reader_value_schema=schema2, reader_key_schema=key_schema), ] # Consumer config cons_conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } for i, combo in enumerate(combinations): reader_key_schema = combo.pop("reader_key_schema") reader_value_schema = combo.pop("reader_value_schema") combo['topic'] = str(uuid.uuid4()) p.produce(**combo) p.poll(0) p.flush() # Create consumer conf = copy(cons_conf) if schema_registry_url: conf['schema.registry.url'] = schema_registry_url c = avro.AvroConsumer(conf, reader_key_schema=reader_key_schema, reader_value_schema=reader_value_schema) else: c = avro.AvroConsumer(conf, schema_registry=InMemorySchemaRegistry(), reader_key_schema=reader_key_schema, reader_value_schema=reader_value_schema) c.subscribe([combo['topic']]) while True: msg = c.poll(0) if msg is None: continue if msg.error(): if msg.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: break else: continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) # omit empty Avro fields from payload for comparison record_key = msg.key() record_value = msg.value() if isinstance(msg.key(), dict): record_key = { k: v for k, v in msg.key().items() if v is not None } if isinstance(msg.value(), dict): record_value = { k: v for k, v in msg.value().items() if v is not None } assert combo.get('key') == record_key assert combo.get('value')['name'] == record_value['name'] c.commit(msg, asynchronous=False) # Close consumer c.close() pass
"security.protocol": "SASL_SSL", "sasl.username": os.getenv('CCLOUDKEY'), "sasl.password": os.getenv('CCLOUDSECRET') } sources = [ FileSource(file_path=dct[key].get('path'), schema_path=f'ingestd/avro/schemas/raw/{key}.avsc', key_fields=dct[key].get('key_fields'), value_fields=dct[key].get('value_fields'), parser=parseDelimited) for key in dct.keys() ] producers = [ avro.AvroProducer(**CONFIG, default_key_schema=key_schema, default_value_schema=value_schema, schema_registry=os.getenv('SCHEMAREGISTRYURL')) for key_schema, value_schema in zip(key_schemas, value_schemas) ] for src, producer, topic in zip(sources, producers, dct.keys()): for specific_record in src.produce_payload(specific_flag=True): producer.produce(topic=topic, key=dict({ key_name: key_value for key_name, key_value in zip( specific_record.get('record_key_fields'), specific_record.get('record_key')) }), value=dict({ field_name: field_value
def io_stream(file_path): def produce(producer: Producer, record: AbstractRecord, **kwargs): for key, value in producer.produce( config = utils.retrieve(kwargs['--path_to_config'], kwargs['--section']), topic_name = kwargs['--topic'], num_partitions = kwargs['--partitions'], key=kwargs['--key'], value={**record} ) class Producer() def __init__(self, **kwargs) -> None: self.config = utils.retrieve(kwargs['--path_to_config'], kwargs['--section']) self.topic_name = kwargs['--topic'] self.num_partitions = kwargs['--partitions'] self.key = kwargs['--key'] self.producer = avro.AvroProducer(self.config) self.source_file = kwargs['--source_file'] self.key_schema = kwargs['--key_schema'] self.value_schema = kwargs['--value_schema'] self.file_extension = kwargs['--file_extension'] def produce(self, **kwargs): for rec_type, record_value in utils.generate_payload(self.source_file): path_to_key_schema = Path('ingestd/kafka/schemas') / self.key_schema path_to_value_schema = Path('ingestd/kafka/schemas') / self.value_schema record_key = avro.load( path_to_key_schema.as_posix() ) record_schema = avro.load( path_to_value_schema.as_posix()) self.producer.produce(key=record_key, value=record_value, key_schema=record_key, value_schema=record_value) p = avro.AvroProducer(config=utils.retrieve(file_path='confs/finwire.yaml', section='producer')) # Route message production based on doc type for file_path in Path('/data/').glob('FINWIRE*[1234]'): for rec_type, record_value in utils.generate_payload(file_path.__str__()): topic_subject = "finwire{0}".format(rec_type) record_schema = avro.load((schema_dir / ('finwire{0}.avsc'.format(rec_type.lower()))).as_posix()) try: p.produce(topic=topic_subject, value=record_value, value_schema=record_schema, callback=utils.ackback) p.poll(.25) except RuntimeError as e: print("Runtime Error: {}".format(e)) print("Completed file {}".format(file_path.as_posix())) p.flush()
def run_avro_loop(producer_conf, consumer_conf): from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro') p = avro.AvroProducer(producer_conf) prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) str_value = 'abc' float_value = 32.0 combinations = [ dict(key=float_value, key_schema=prim_float), dict(value=float_value, value_schema=prim_float), dict(key={'name': 'abc'}, key_schema=basic), dict(value={'name': 'abc'}, value_schema=basic), dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float), dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string), dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic), dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string), dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic), dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float), # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342) dict(value='', value_schema=prim_string, key=0.0, key_schema=prim_float), dict(value=0.0, value_schema=prim_float, key='', key_schema=prim_string), ] for i, combo in enumerate(combinations): combo['topic'] = str(uuid.uuid4()) combo['headers'] = [('index', str(i))] p.produce(**combo) p.flush() c = avro.AvroConsumer(consumer_conf) c.subscribe([(t['topic']) for t in combinations]) msgcount = 0 while msgcount < len(combinations): msg = c.poll(1) if msg is None: continue if msg.error(): print(msg.error()) continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) # omit empty Avro fields from payload for comparison record_key = msg.key() record_value = msg.value() index = int(dict(msg.headers())['index']) if isinstance(msg.key(), dict): record_key = {k: v for k, v in msg.key().items() if v is not None} if isinstance(msg.value(), dict): record_value = {k: v for k, v in msg.value().items() if v is not None} assert combinations[index].get('key') == record_key assert combinations[index].get('value') == record_value c.commit() msgcount += 1 # Close consumer c.close()
def verify_avro_explicit_read_schema(): from confluent_kafka import avro """ verify that reading Avro with explicit reader schema works""" base_conf = {'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'schema.registry.url': schema_registry_url} consumer_conf = dict(base_conf, **{ 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'on_commit': print_commit_result, 'auto.offset.reset': 'earliest', 'schema.registry.url': schema_registry_url}) avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro') writer_schema = avro.load(os.path.join(avsc_dir, "user_v1.avsc")) reader_schema = avro.load(os.path.join(avsc_dir, "user_v2.avsc")) user_value1 = { "name": " Rogers Nelson" } user_value2 = { "name": "Kenny Loggins" } combinations = [ dict(key=user_value1, key_schema=writer_schema, value=user_value2, value_schema=writer_schema), dict(key=user_value2, key_schema=writer_schema, value=user_value1, value_schema=writer_schema) ] avro_topic = topic + str(uuid.uuid4()) p = avro.AvroProducer(base_conf) for i, combo in enumerate(combinations): p.produce(topic=avro_topic, **combo) p.flush() c = avro.AvroConsumer(consumer_conf, reader_key_schema=reader_schema, reader_value_schema=reader_schema) c.subscribe([avro_topic]) msgcount = 0 while msgcount < len(combinations): msg = c.poll(1) if msg is None: continue if msg.error(): print("Consumer error {}".format(msg.error())) continue msgcount += 1 # Avro schema projection should return the two fields not present in the writer schema try: assert(msg.key().get('favorite_number') == 42) assert(msg.key().get('favorite_color') == "purple") assert(msg.value().get('favorite_number') == 42) assert(msg.value().get('favorite_color') == "purple") print("success: schema projection worked for explicit reader schema") except KeyError: raise confluent_kafka.avro.SerializerError("Schema projection failed when setting reader schema.")