def produce(topic, conf): """ Produce User records """ from confluent_kafka.avro import AvroProducer producer = AvroProducer(conf, default_value_schema=record_schema) print("Producing user records to topic {}. ^c to exit.".format(topic)) while True: # Instantiate new User, populate fields, produce record, execute callbacks. record = User() try: record.name = input("Enter name: ") record.favorite_number = int(input("Enter favorite number: ")) record.favorite_color = input("Enter favorite color: ") # The message passed to the delivery callback will already be serialized. # To aid in debugging we provide the original object to the delivery callback. producer.produce(topic=topic, value=record.to_dict(), callback=lambda err, msg, obj=record: on_delivery( err, msg, obj)) # Serve on_delivery callbacks from previous asynchronous produce() producer.poll(0) except KeyboardInterrupt: break except ValueError: print("Invalid input, discarding record...") continue print("\nFlushing records...") producer.flush()
def produce(conf, data_file, schema_record): """ Produce MetadataChangeEvent records """ producer = AvroProducer(conf, default_value_schema=avro.load(schema_record)) print("Producing MetadataChangeEvent records to topic {}. ^c to exit.". format(topic)) with open(data_file) as fp: cnt = 0 while True: sample = fp.readline() cnt += 1 if not sample: break try: content = ast.literal_eval(sample.strip()) producer.produce(topic=topic, value=content) producer.poll(0) print(" MCE{}: {}".format(cnt, sample)) except KeyboardInterrupt: break except ValueError as e: print("Message serialization failed {}".format(e)) break print("Flushing records...") producer.flush()
def confluent_kafka_producer_performance(args): value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) avroProducer = AvroProducer({ 'bootstrap.servers': args.bootstrap_servers, 'schema.registry.url': args.schema_registry }, default_key_schema=key_schema, default_value_schema=value_schema) messages_to_retry = 0 for i in range(int(args.msg_count)): value = {"data": random.choice(simple_messages)} key = {"key": str(uuid.uuid4())} try: avroProducer.produce(topic=args.topic, value=value, key=key) except BufferError as e: messages_to_retry += 1 for i in range(messages_to_retry): avroProducer.poll(0) try: avroProducer.produce(topic=args.topic, value=value, key=key) except BufferError as e: avroProducer.poll(0) avroProducer.produce(topic=args.topic, value=value, key=key) avroProducer.flush()
def produce(topic, conf): """ Produce User records """ from confluent_kafka.avro import AvroProducer producer = AvroProducer(conf, default_value_schema=record_schema) print("Producing user records to topic {}. ^c to exit.".format(topic)) while True: # Instantiate new User, populate fields, produce record, execute callbacks. record = User() try: record.name = input("Enter name: ") record.favorite_number = int(input("Enter favorite number: ")) record.favorite_color = input("Enter favorite color: ") # The message passed to the delivery callback will already be serialized. # To aid in debugging we provide the original object to the delivery callback. producer.produce(topic=topic, value=record.to_dict(), callback=lambda err, msg, obj=record: on_delivery(err, msg, obj)) # Serve on_delivery callbacks from previous asynchronous produce() producer.poll(0) except KeyboardInterrupt: break except ValueError: print("Invalid input, discarding record...") continue print("\nFlushing records...") producer.flush()
class AvroProducerFacade: def __init__(self, name, emit_datum, broker, schema_registry_url): self.name = name self.emit_datum = emit_datum schema = avro.loads(get_schema_def()) self.producer = AvroProducer( { 'bootstrap.servers': broker, 'schema.registry.url': schema_registry_url, **get_sr_config_from_environment(), **get_kafka_config_from_environment(), }, default_key_schema=schema, default_value_schema=schema) def delivery_callback(self, err, msg): if err: log.debug("Failed to send from '%s': %s", self.name, err) datum = Datum(bad_count=1) else: datum = Datum(good_count=1) self.emit_datum(datum) def produce(self, topic, poll_wait=0): value = {'name': 'foo'} self.producer.produce(topic=topic, callback=self.delivery_callback, key=value, value=value) self.producer.poll(poll_wait) def close(self): self.producer.flush()
def send_to_kafka(): Timer(10.0, send_to_kafka).start() try: print("running") avro_producer = AvroProducer( { 'bootstrap.servers': 'up01:9092,up02:9092,up03:9092', 'schema.registry.url': 'http://up04:8081' }, default_key_schema=key_schema, default_value_schema=value_schema) value = read_from_sense_hat() print(value) avro_producer.poll(0) avro_producer.produce(topic='test_avro_2', value=value, key=key, callback=delivery_report) avro_producer.flush() except Exception as e: logging.error(traceback.format_exc())
def produce(self, data): print("Start Producer") avro_producer = AvroProducer(get_settings()) avro_producer.poll(0) avro_producer.produce(topic=TEST_TOPIC, value=data, value_schema=self._schema) avro_producer.flush() print('Produced message: {}'.format(data))
class KafkaProducer: def __init__( self, key_schema_str, value_schema_str, raw_key_model, raw_value_model, topic_name, header_index, ): self.topic_name = topic_name self.header_index = header_index self.key_schema = avro.loads(key_schema_str) self.value_schema = avro.loads(value_schema_str) self.avro_producer = AvroProducer( { "bootstrap.servers": os.environ["BOOTSTRAP_SERVERS"], "schema.registry.url": os.environ["SCHEMA_REGISTRY_URL"], # Safe producer settings # 'enable.idempotence': True, # High throughput # 'compression.type': 'snappy', # 'linger.ms': 20, # 'batch.size': 32768 }, default_key_schema=self.key_schema, default_value_schema=self.value_schema, ) self.raw_key_model = raw_key_model self.raw_value_model = raw_value_model def preprocessing(self, data): backed_key_obj = dict() backed_value_obj = dict() for key_model in self.raw_key_model: key_index = self.header_index[key_model] backed_key_obj[key_model] = data[key_index] for value_model in self.raw_value_model: value_index = self.header_index[value_model] backed_value_obj[value_model] = data[value_index] return backed_key_obj, backed_value_obj def produce_event(self, data, pre_process=True): if pre_process == True: key, value = self.preprocessing(data) else: key, value = data self.avro_producer.produce(topic=self.topic_name, key=key, value=value) self.avro_producer.poll(0.1)
def produce_mysql_dataset_mce(mce): """ Produce MetadataChangeEvent records. """ conf = {'bootstrap.servers': BOOTSTRAP, 'schema.registry.url': SCHEMAREGISTRY} record_schema = avro.load(AVROLOADPATH) producer = AvroProducer(conf, default_value_schema=record_schema) try: producer.produce(topic=KAFKATOPIC, value=mce) producer.poll(0) sys.stdout.write('\n%s has been successfully produced!\n' % mce) except ValueError as e: sys.stdout.write('Message serialization failed %s' % e) producer.flush()
def produce_dataset_mce(mce, kafka_config): """ Produces a MetadataChangeEvent to Kafka """ conf = { 'bootstrap.servers': kafka_config.bootstrap_server, 'schema.registry.url': kafka_config.schema_registry } record_schema = avro.load(kafka_config.avsc_path) producer = AvroProducer(conf, default_value_schema=record_schema) try: producer.produce(topic=kafka_config.kafka_topic, value=mce) producer.poll(0) print('\n%s has been successfully produced!\n' % mce) except ValueError as e: print('Message serialization failed %s' % e) producer.flush()
def produce(schema_json, data): print('schema:\n') pprint.pprint(schema_json) print('\n') print('message:\n') pprint.pprint(data) print('\n') schema_avro = avro.loads(json.dumps(schema_json)) producer = AvroProducer({'bootstrap.servers': broker}, default_value_schema=schema_avro, schema_registry=schema_registry) producer.poll(0) producer.produce(topic=topic, value=data) producer.flush()
class BaseProducer(object): logging.basicConfig( level=logging.INFO, format= "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s", handlers=[logging.FileHandler("example.log"), logging.StreamHandler()]) def __init__(self, avro_schema_path, bootsrap_servers='localhost:29092', schema_registry='http://localhost:8081', topic_subscribe='quickstart-elastic-news'): # remember if you are running this code outside docker set bootstrap.servers = 'localhost:29092' and 'schema.registry.url'= 'http://localhost:8081', # otherwise set bootstrap.servers = 'kafka:9092' and 'schema.registry.url'= 'http://schema-registry:8081' # "debug":"all", self.conf = { 'schema.registry.url': schema_registry, 'bootstrap.servers': bootsrap_servers } self.SCHEMA = avro.loads(open(avro_schema_path, "r").read()) self.topic_subscribe = topic_subscribe self.logger = logging.getLogger(__name__) def _on_delivery(self, err, msg, obj): """ Handle delivery reports served from producer.poll. This callback takes an extra argument, obj. This allows the original contents to be included for debugging purposes. """ if err is not None: self.logger.error( 'Message {} delivery failed for user with error {}'.format( obj, err)) def __enter__(self): self.avroProducer = AvroProducer(self.conf, default_value_schema=self.SCHEMA) return self def __exit__(self, exc_type, exc_val, exc_tb): self.avroProducer.flush() def send_message(self, record): try: self.avroProducer.produce(topic=self.topic_subscribe, value=record, callback=lambda err, msg, obj=record: self._on_delivery(err, msg, obj)) self.avroProducer.poll(30) except KeyboardInterrupt as e: raise KafkaKeyboardInterrupt(e) except ValueError as e: raise KafkaValueError("Invalid input {}" % e) except Exception as e: #raise GenericKafkaError(e) raise Exception(e) def __repr__(self): return '%s(%r)' % (self.__class__.__name__, self.bootsrap_servers) @staticmethod def test(): from faker import Faker avro_schema_path = "../avro/sensor_reading.avsc" topic_subscribe = "test-elasticsearch-sink" with BaseProducer(avro_schema_path=avro_schema_path, topic_subscribe=topic_subscribe) as prod: gen = Faker() for i in range(100): message = { "id": gen.uuid4(), "lat": float(gen.latitude()), "lon": float(gen.longitude()), "val": gen.pyint() } prod.send_message(message) print("Messages correctly sent to kafka broker") print("topic: {0}, schema_path: {1}".format(topic_subscribe, avro_schema_path))
# permanently failed delivery (after retries). def acked(err, msg): """Delivery report handler called on successful or failed delivery of message """ if err is not None: print("Failed to deliver message: {}".format(err)) else: print("Produced record to topic {} partition [{}] @ offset {}". format(msg.topic(), msg.partition(), msg.offset())) for n in range(10): name_object = ccloud_lib.Name() name_object.name = "alice" record_key = name_object.to_dict() count_object = ccloud_lib.Count() count_object.count = n record_value = count_object.to_dict() print("Producing Avro record: {}\t{}".format(name_object.name, count_object.count)) p.produce(topic=topic, key=record_key, value=record_value, on_delivery=acked) # p.poll() serves delivery reports (on_delivery) # from previous produce() calls. p.poll(0) p.flush(10) print("10 messages were produced to topic {}!".format(topic))
# Initialize key and values lat = 40.043152 lng = -75.18071 bus_id = 1 key = {"bus_id": 1} # Produce events simulating bus movements, forever count = 1 while True: value = {"bus_id": bus_id, "lat": lat, "lng": lng} avroProducer.produce(topic=TOPIC_NAME, value=value, key=key) print("EVENT COUNT: {} key: {} lat: {}, lng: {}".format( count, key, lat, lng)) # Polls the producer for events and calls the corresponding callbacks # (if registered) # # `timeout` refers to the maximum time to block waiting for events # # Since produce() is an asynchronous API this poll() call will most # likely not serve the delivery callback for the last produce()d message. avroProducer.poll(timeout=0) time.sleep(0.3) lat += 0.000001 lng += 0.000001 count += 1 # Cleanup step: wait for all messages to be delivered before exiting. avroProducer.flush()
# producer = Producer({'bootstrap.servers': BOOTSTRAP_SERVERS}) print("Successfully connected to the broker") def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) while True: # Trigger any available delivery report callbacks from previous produce() calls avroProducer.poll(0) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. value = dict({"name": str(np.random.randn())}) key = {"name": "mykey"} print(value) avroProducer.produce(topic='avro-connect', value=value, key=key) avroProducer.flush() time.sleep(10) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered.
# Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. producer.flush() atexit.register(exit_handler) with open(os.path.join(DRIVER_FILE_PREFIX, DRIVER_ID + ".csv")) as f: lines = f.readlines() pos = 0 # Loop forever over the driver CSV file.. while True: line = lines[pos] # Trigger any available delivery report callbacks from previous produce() calls producer.poll(0) key = {"key": DRIVER_ID} latitude = line.split(",")[0].strip() longitude = line.split(",")[1].strip() value = {"latitude": float(latitude), "longitude": float(longitude)} # ..and write the lat/long position to a Kafka topic producer.produce(topic=KAFKA_TOPIC, value=value, key=key, callback=lambda err, msg: print("Sent Key:{} Value:{}". format(key, value) if err is None else err)) sleep(1) pos = (pos + 1) % len(lines) # Confirm the topic is being written to with kafka-avro-console-consumer
def avro_producer(self, broker, schema_registry, topic, gen_dt_rows): # avro schema [key] & [value] key_schema_str = config.key_schema_str value_schema_str = config.value_schema_str # load avro definition key_schema = avro.loads(key_schema_str) value_schema = avro.loads(value_schema_str) # get data to insert get_data = read_files.CSV().csv_reader(gen_dt_rows) # init producer using key & value schema producer = AvroProducer( { # client id "client.id": 'sr-py-yelp-stream-app', # kafka broker server "bootstrap.servers": broker, # schema registry url "schema.registry.url": schema_registry, # eos = exactly once semantics [options] "enable.idempotence": "true", "max.in.flight.requests.per.connection": 1, "retries": 100, "acks": "all", # max number of messages batched in one message set "batch.num.messages": 1000, # delay in ms to wait for messages in queue "queue.buffering.max.ms": 100, # max number of messages on queue "queue.buffering.max.messages": 1000, # wait messages in queue before send to brokers (batch) "linger.ms": 100 }, default_key_schema=key_schema, default_value_schema=value_schema) # loop to insert data inserts = 0 while inserts < len(get_data): # instantiate new records, execute callbacks record = Kafka() try: # map columns and access using dict values record.review_id = get_data[inserts]['review_id'] record.business_id = get_data[inserts]['business_id'] record.user_id = get_data[inserts]['user_id'] record.stars = get_data[inserts]['stars'] record.useful = get_data[inserts]['useful'] record.date = get_data[inserts]['date'] # print(record.to_dict()) # server on_delivery callbacks from previous asynchronous produce() producer.poll(0) # message passed to the delivery callback will already be serialized. # to aid in debugging we provide the original object to the delivery callback. producer.produce(topic=topic, key={'review_id': record.review_id}, value=record.to_dict(), callback=lambda err, msg, obj=record: self. on_delivery(err, msg, obj)) except BufferError: print("buffer full") producer.poll(0.1) except ValueError: print("invalid input") raise except KeyboardInterrupt: raise # increment values inserts += 1 print("flushing records...") # buffer messages to send producer.flush()
long(float(datetime.utcnow().strftime('%s.%f')) * 1000), 'user_agent': str(random.choice(agents)), 'ip': str('.'.join([str(random.randint(0, 255)) for x in range(4)])), 'referrer': str(random.choice(referrers)), 'cost': random.uniform(0.05, 1.00) } avro_producer.produce(topic=topic, key=click_key, value=click_value, callback=delivery_callback) except BufferError: sys.stderr.write( '%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(avro_producer)) # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. avro_producer.poll(0) # Wait until all messages have been delivered sys.stderr.write('%% Waiting for %d deliveries\n' % len(avro_producer)) avro_producer.flush()
class KafkaProducer(StreamWriter): """ Class KafkaProducer is initiated from Extractor """ def __init__(self, broker, topic, schema_registry_url, value_schema_filename): """Construct a KafkaProducer :param str broker: The name of Kafka Broker Server and port :param str topic: The Kafka Topic name :param str schema_registry_url: The Kafka Schema Registry Server url :param str value_schema: The Kafka Avro Schema file name :return: Returns True if successful otherwise False :rtype: Boolean """ self.broker = broker self.topic = topic self.schema_registry_url = schema_registry_url self.value_schema_filename = value_schema_filename self._logger = logging.getLogger(__name__) self.config = { 'bootstrap.servers': self.broker, 'schema.registry.url': self.schema_registry_url, 'queue.buffering.max.ms': const.KAFKA_MAX_BUFFERED_MS, 'queue.buffering.max.messages': const.KAFKA_MAX_BUFFERED_MSG, } try: # read the avro schema file self.value_schema = avro.load(self.value_schema_filename) except Exception: self._logger.exception( "Extractor: exception in opening the avro schema file: {file} " .format(file=self.value_schema_filename)) raise try: self._producer = AvroProducer( self.config, default_value_schema=self.value_schema) self._logger.info("Opened stream output for writing: {}/{}, " "schemaregistry: {}, schemafile: {}" .format( self.broker, self.topic, self.schema_registry_url, self.value_schema_filename)) except Exception: self._logger.exception( "Extractor: exception in initializing Avro Producer. " "Broker:{broker} Schema Registry URL: {schema_reg_url} " "Schema File: {schema_file}" .format( broker=self.broker, schema_reg_url=self.schema_registry_url, schema_file=self.value_schema_filename)) raise def write(self, message): """ Sends a message to the Kafka Topic """ try: # send message to kafka topic self._producer.produce(topic=self.topic, value=message) # As per the recommendation: # https://github.com/confluentinc/confluent-kafka-python/issues/16 self._producer.poll(0) except Exception: self._logger.exception( "Extractor: exception in writing into Kafka topic: {topic}" .format(topic=self.topic)) return False self._logger.debug("Message sent to topic: {topic}" .format(topic=self.topic)) return True def flush(self): self._logger.info("Flushing kafka producer queue") self._producer.flush()
class AvroProducerApi(ABCMbApi): """ This class implements the Interface for Kafka producer carrying Avro messages. It is expected that the users would extend this class and override on_delivery function. """ def __init__(self, *, producer_conf: dict, key_schema_location, value_schema_location: str, logger: logging.Logger = None, retries: int = 3): """ Initialize the Producer API :param producer_conf: configuration e.g: {'bootstrap.servers': localhost:9092, 'schema.registry.url': http://localhost:8083} :param key_schema_location: AVRO schema location for the key :param value_schema_location: AVRO schema location for the value """ super(AvroProducerApi, self).__init__(logger=logger) self.lock = threading.Lock() self.key_schema = self.load_schema(schema_file=key_schema_location) self.value_schema = self.load_schema(schema_file=value_schema_location) self.producer = AvroProducer(producer_conf, default_key_schema=self.key_schema, default_value_schema=self.value_schema) self.retry_attempts = retries def load_schema(self, schema_file: str): try: from confluent_kafka import avro file = open(schema_file, "r") schema_bytes = file.read() file.close() return avro.loads(schema_bytes) except Exception as e: self.logger.error( f"Exception occurred while loading the schema: {schema_file}: {e}" ) self.logger.error(traceback.format_exc()) def set_logger(self, logger): """ Set logger :param logger: logger """ self.logger = logger def delivery_report(self, err, msg, obj: AbcMessageAvro): """ Handle delivery reports served from producer.poll. This callback takes an extra argument, obj. This allows the original contents to be included for debugging purposes. """ if err is not None: self.logger.error( f"KAFKA: Message Delivery Failure! Error [{err}] MsgId: [{obj.id}] " f"Msg Name: [{obj.name}]") obj.set_kafka_error(kafka_error=err) else: self.logger.debug( f"KAFKA: Message Delivery Successful! MsgId: [{obj.id}] Msg Name: [{obj.name}] " f"Topic: [{msg.topic()}] Partition [{msg.partition()}] Offset [{msg.offset()}]" ) def produce(self, topic, record: AbcMessageAvro) -> bool: """ Produce records for a specific topic :param topic: topic to which messages are written to :param record: record/message to be written :return: """ try: self.logger.debug(f"KAFKA: Record type={type(record)}") self.logger.debug( f"KAFKA: Producing key {record.get_id()} to topic {topic}.") self.logger.debug( f"KAFKA: Producing record {record.to_dict()} to topic {topic}." ) self.lock.acquire() # Pass the message asynchronously self.producer.produce(topic=topic, key=record.get_id(), value=record.to_dict(), callback=lambda err, msg, obj=record: self. delivery_report(err, msg, obj)) return True except ValueError as ex: self.logger.error("KAFKA: Invalid input, discarding record...") self.logger.error(f"KAFKA: Exception occurred {ex}") self.logger.error(traceback.format_exc()) except Exception as ex: self.logger.error(f"KAFKA: Exception occurred {ex}") self.logger.error(traceback.format_exc()) finally: if self.lock.locked(): self.lock.release() return False def poll(self, timeout: float = 0.0): """ Poll for delivery callbacks :param timeout: timeout :return: """ try: self.lock.acquire() return self.producer.poll(timeout=timeout) finally: self.lock.release() def flush(self, timeout: float = None): """ Flush all pending writes :param timeout: timeout :return: """ try: self.lock.acquire() self.producer.flush() finally: self.lock.release()