def test_produce_primitive_string_key(self): value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}) with self.assertRaises(ConnectionError): # Unexistent schema-registry producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey', key_schema=key_schema)
def test_produce_with_empty_key_no_schema(self): value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) schema_registry = MockSchemaRegistryClient() producer = AvroProducer({}, schema_registry=schema_registry, default_value_schema=value_schema) with self.assertRaises(KeySerializerError): producer.produce(topic='test', value=0.0, key='')
def test_produce_with_custom_registry(self): schema_registry = MockSchemaRegistryClient() value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) producer = AvroProducer({}, schema_registry=schema_registry) producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey', key_schema=key_schema)
def test_produce_with_empty_key_value_with_schema(self): key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) schema_registry = MockSchemaRegistryClient() producer = AvroProducer({}, schema_registry=schema_registry, default_key_schema=key_schema, default_value_schema=value_schema) producer.produce(topic='test', value=0.0, key='')
def test_produce_primitive_string_key(self): value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) producer = AvroProducer( {'schema.registry.url': 'http://127.0.0.1:9001'}) with self.assertRaises(ConnectionError): # Unexistent schema-registry producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey', key_schema=key_schema)
def __init__(self, kafkaAddress, schemaRegistryAddress, kafkaConnectAddress, credentialPath, testVersion): self.testVersion = testVersion self.credentialPath = credentialPath with open(self.credentialPath) as f: credentialJson = json.load(f) testHost = credentialJson["host"] testUser = credentialJson["user"] testDatabase = credentialJson["database"] testSchema = credentialJson["schema"] testWarehouse = credentialJson["warehouse"] pk = credentialJson["encrypted_private_key"] pk_passphrase = credentialJson["private_key_passphrase"] self.TEST_DATA_FOLDER = "./test_data/" self.httpHeader = { 'Content-type': 'application/json', 'Accept': 'application/json' } self.SEND_INTERVAL = 0.01 # send a record every 10 ms self.VERIFY_INTERVAL = 60 # verify every 60 secs self.MAX_RETRY = 120 # max wait time 120 mins self.MAX_FLUSH_BUFFER_SIZE = 5000 # flush buffer when 10000 data was in the queue self.kafkaConnectAddress = kafkaConnectAddress self.schemaRegistryAddress = schemaRegistryAddress self.kafkaAddress = kafkaAddress self.adminClient = AdminClient({"bootstrap.servers": kafkaAddress}) self.producer = Producer({'bootstrap.servers': kafkaAddress}) self.avroProducer = AvroProducer({ 'bootstrap.servers': kafkaAddress, 'schema.registry.url': schemaRegistryAddress }) reg = "[^\/]*snowflakecomputing" # find the account name account = re.findall(reg, testHost) if len(account) != 1 or len(account[0]) < 20: print( datetime.now().strftime("%H:%M:%S "), "Format error in 'host' field at profile.json, expecting account.snowflakecomputing.com:443" ) pkb = parsePrivateKey(pk, pk_passphrase) self.snowflake_conn = snowflake.connector.connect( user=testUser, private_key=pkb, account=account[0][:-19], warehouse=testWarehouse, database=testDatabase, schema=testSchema)
def __init__(self, args): self.args = args self.avro_producer = AvroProducer( { 'bootstrap.servers': self.args.brokers, 'schema.registry.url': self.args.registry }, default_key_schema=avro.loads(self.args.keyschema), # key schema default_value_schema=avro.loads(self.args.schema) # value schema )
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = { "bootstrap.servers": BOOTSTRAP_SERVER_URL, "schema.registry.url": SCHEMA_REGISTRY_URL, } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) self.producer = AvroProducer(self.broker_properties, default_key_schema=key_schema, default_value_schema=value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" admin_client = AdminClient({'bootstrap.servers': BOOTSTRAP_SERVER_URL}) admin_client.create_topics([ NewTopic(topic=self.topic_name, num_partitions=self.num_partitions, replication_factor=self.num_replicas) ]) def close(self): """Prepares the producer for exit by cleaning up the producer""" if self.producer: self.producer.flush(timeout=10.0) @staticmethod def time_millis(): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=6, num_replicas=2, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas # # # TODO: Configure the broker properties below. Make sure to reference the project README # and use the Host URL for Kafka and Schema Registry! # # self.broker_properties = { # TODO # TODO # TODO "bootstrap.servers":"PLAINTEXT://localhost:9092", "schema.registry.url":"http://localhost:8081" } self.client = AdminClient( { # 'debug': 'admin', "bootstrap.servers": "localhost:9092" } ) # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) # TODO: Configure the AvroProducer # self.producer = AvroProducer( # ) # avroProducer = AvroProducer({ # 'bootstrap.servers': 'mybroker,mybroker2', # 'on_delivery': delivery_report, # 'schema.registry.url': 'http://schema_registry_host:port' # }, default_key_schema=key_schema, default_value_schema=value_schema) self.producer = AvroProducer( config = self.broker_properties, default_key_schema=self.key_schema, default_value_schema=self.value_schema )
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = { 'bootstrap.servers': 'PLAINTEXT://localhost:9092', 'schema.registry.url': 'http://localhost:8081' } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) # Configure the AvroProducer self.producer = AvroProducer(self.broker_properties, default_key_schema=self.key_schema, default_value_schema=self.value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" client = AdminClient( {'bootstrap.servers': self.broker_properties['bootstrap.servers']}) new_topic = NewTopic(self.topic_name, num_partitions=self.num_partitions, replication_factor=self.num_replicas) client.create_topics([new_topic]) def close(self): """Prepares the producer for exit by cleaning up the producer""" if self.producer is not None: self.producer.flush() def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = { "id": 1, "host": "localhost", "port": 29092 } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) self.producer = AvroProducer({ 'bootstrap.servers': self.broker_properties, 'schema.registry.url': 'http://localhost:8091' }, default_key_schema=key_schema, default_value_schema=value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" NewTopic(self.topic_name, self.num_partitions, self.num_replicas) logger.info("topic creation kafka integration complete") def time_millis(self): return int(round(time.time() * 1000)) def close(self): self.producer.flush() logger.info("producer close incomplete - skipping") def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self._client = None # # # TODO: Configure the broker properties below. Make sure to reference the project README # and use the Host URL for Kafka and Schema Registry! # # self.broker_properties = { "schema.registry.url": "http://localhost:8081", "bootstrap.servers": "PLAINTEXT://localhost:9092", "cleanup.policy": "delete", "compression.type": "lz4", "delete.retention.ms": "2000", "file.delete.delay.ms": "2000", "linger.ms": 1000, "batch.num.messages": 100, "on_delivery": delivery_report } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) # TODO: Configure the AvroProducer self.producer = AvroProducer( { "bootstrap.servers": "PLAINTEXT://localhost:9092", "schema.registry.url": "http://localhost:8081" }, default_key_schema=key_schema, default_value_schema=value_schema) # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name)
class KafkaProducer: def __init__(self, kafka_env='LOCAL', kafka_brokers="", kafka_apikey="", schema_registry_url=""): self.kafka_env = kafka_env self.kafka_brokers = kafka_brokers self.kafka_apikey = kafka_apikey self.schema_registry_url = schema_registry_url def prepareProducer(self, groupID="pythonproducers", key_schema="", value_schema=""): options = { 'bootstrap.servers': self.kafka_brokers, 'schema.registry.url': self.schema_registry_url, 'group.id': groupID } # We need this test as local kafka does not expect SSL protocol. if (self.kafka_env != 'LOCAL'): options['security.protocol'] = 'SASL_SSL' options['sasl.mechanisms'] = 'PLAIN' options['sasl.username'] = '******' options['sasl.password'] = self.kafka_apikey if (self.kafka_env == 'ICP'): options['ssl.ca.location'] = os.environ['PEM_CERT'] options['schema.registry.ssl.ca.location'] = os.environ['PEM_CERT'] print("--- This is the configuration for the producer: ---") print(options) print("---------------------------------------------------") self.producer = AvroProducer(options, default_key_schema=key_schema, default_value_schema=value_schema) def delivery_report(self, err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('[ERROR] - Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format( msg.topic(), msg.partition())) def publishEvent(self, topicName, value, key): # Important: value DOES NOT come in JSON format from ContainerAvroProducer.py. Therefore, we must convert it to JSON format first self.producer.produce(topic=topicName, value=json.loads(value), key=json.loads(value)[key], callback=self.delivery_report) self.producer.flush()
def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas # Broker Properties # Documentation for the Producer API is available in # https://docs.confluent.io/current/clients/python.html # The avro producer is just wrapper on top of the standard # confluent python producer. The supported configuration values are dictated # by the underlying librdkafka, written in C. The documentation of all available # properties is available in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md self.broker_properties = { # Default Kafka configurations 'bootstrap.servers': 'PLAINTEXT://localhost:9092', 'client.id': 'simulation_producer', # Avro schema 'schema.registry.url': 'http://localhost:8081', # Batching configurations 'linger.ms': 0.5, # Number of ms to wait to accumulate messages to send 'batch.num.messages': 100, # Number of messages to accumulate before sending 'queue.buffering.max.kbytes': 2097151 # Set to 2GB - default is 16 GB } # Configure admin client self.admin_client = AdminClient( {'bootstrap.servers': self.broker_properties['bootstrap.servers']} ) # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) # Configure the AvroProducer self.producer = AvroProducer( config=self.broker_properties )
class Producer: """Defines and provides common functionality amongst Producers""" existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = {"bootstrap.servers": KAFKA_BROKER_URL} if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) schema_registry = CachedSchemaRegistryClient("http://localhost:8081") self.producer = AvroProducer(self.broker_properties, default_key_schema=self.key_schema, default_value_schema=self.value_schema, schema_registry=schema_registry) def create_topic(self): """Creates the producer topic if it does not already exist""" client = AdminClient(self.broker_properties) topic = NewTopic(self.topic_name, num_partitions=self.num_partitions, replication_factor=self.num_replicas) client.create_topics([topic]) def time_millis(self): return int(round(time.time() * 1000)) def close(self): Producer.existing_topics = set([]) self.producer.close() def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
def __init__(self): config = self.load_config(self.CONFIG_FILE) sc = CachedSchemaRegistryClient(url=config['kafkaSchemaRegistryUrl']) self.topic = config['kafkaTopics'][0] key_schema = sc.get_latest_schema(self.topic + "-key")[1] val_schema = sc.get_latest_schema(self.topic + "-value")[1] self.producer = AvroProducer( { 'bootstrap.servers': config['kafkaBootstrapServers'], 'schema.registry.url': config['kafkaSchemaRegistryUrl'] }, default_key_schema=key_schema, default_value_schema=val_schema)
def __init__(self, name, emit_datum, broker, schema_registry_url): self.name = name self.emit_datum = emit_datum schema = avro.loads(get_schema_def()) self.producer = AvroProducer( { 'bootstrap.servers': broker, 'schema.registry.url': schema_registry_url, **get_sr_config_from_environment(), **get_kafka_config_from_environment(), }, default_key_schema=schema, default_value_schema=schema)
def __init__(self): url = "http://localhost:8088" self.api_client = KSQLAPI(url) self.topic = "test08" self.bootstrap_servers = "localhost:9092" if utils.check_kafka_available(self.bootstrap_servers): value_schema_str = """ { "type": "record", "namespace": "com.example", "name": "value", "fields": [ {"name":"LOCATION", "type":"string"}, {"name":"DATETIME", "type":"string"}, {"name":"SENTIMENT", "type":"string"}, {"name":"TEXT", "type":"string"} ] } """ key_schema_str = """ { "type": "record", "namespace": "com.example", "name": "key", "fields": [ {"name":"LOCATION", "type":"string"}, {"name":"DATETIME", "type":"string"}, {"name":"SENTIMENT", "type":"string"}, {"name":"TEXT", "type":"string"} ] } """ value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) self.key = { "LOCATION": "LOCATION", "DATETIME": "DATETIME", "SENTIMENT": "SENTIMENT", "TEXT": "TEXT" } self.producer = AvroProducer( { 'bootstrap.servers': self.bootstrap_servers, 'on_delivery': delivery_report, 'schema.registry.url': 'http://localhost:8081' }, default_key_schema=None, default_value_schema=value_schema) else: print("Could not connect to Kafka") exit(-1)
class PeriodicProducer(object): def __init__(self, bootstrap_servers, schema_registry_url, topic): value_schema = avro.load('resources/workshop.avsc') config = { 'bootstrap.servers': bootstrap_servers, 'schema.registry.url': 'http://{0}'.format(schema_registry_url) } self.topic = topic self.stopped = True self.end_time = 0 self.producer = AvroProducer(config, default_value_schema=value_schema) def __get_props(self): if random() > 0.5: p = random_movie() return {'title': p[0], 'properties': {'release_year': p[1]}} else: p = random_series() return {'title': p[0], 'properties': {'seasons': p[1]}} def __loop__(self): now = int(time()) props = self.__get_props() document = { 'timestamp': now, 'user': random_user(), 'title': props['title'], 'tags': random_tags(), 'comment': random_sentence(), 'rating': randint(0, 9), 'properties': props['properties'] } print 'Sending {0} to kafka.'.format(document) self.producer.produce(topic=self.topic, value=document) if not self.stopped and now < self.end_time: Timer(1, self.__loop__).start() def run(self, period): self.stopped = False self.end_time = int(time()) + period self.__loop__() def is_stopped(self): return self.stopped def is_running(self): return not self.stopped def stop(self): self.stopped = True
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas BROKER_URL = "PLAINTEXT://localhost:9092" SCHEMA_REGISTRY = "http://localhost:8081" self.broker_properties = {"bootstrap.servers": BROKER_URL} # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) self.producer = AvroProducer( self.broker_properties, schema_registry=CachedSchemaRegistryClient( 'http://localhost:8081'), default_key_schema=self.key_schema, default_value_schema=self.value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" AdminClient(self.broker_properties).create_topics( [NewTopic(self.topic_name, num_partitions=1)]) def close(self): """Prepares the producer for exit by cleaning up the producer""" if self.producer is not None: self.producer.flush() def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
def __init__(self, value_schmea_loc: str = None, key_nameL: str = 'ex-key', topic: str = 'test'): if not value_schmea_loc: raise NoAvroSchemaFileException() self.value_schema = avro.load(value_schmea_loc) self.avro_producer = AvroProducer( { 'bootstrap.servers': 'localhost:9092', 'schema.registry.url': 'http://127.0.0.1:8081' }, default_value_schema=self.value_schema) self.topic = topic
def __init__(self, schema_name, topic): kafka_cfg = parse_kafka_config() key_schema, value_schema = load_avro_schema_from_registry( schema_name, kafka_cfg['schema-registry-url']) producer_config = { "bootstrap.servers": kafka_cfg['bootstrap-servers'], "schema.registry.url": kafka_cfg['schema-registry-url'] } self.topic = topic self.producer = AvroProducer(producer_config, default_key_schema=key_schema, default_value_schema=value_schema)
def produce(conf, data_file, schema_record): """ Produce MetadataChangeEvent records """ producer = AvroProducer(conf, default_value_schema=avro.load(schema_record)) print("Producing MetadataChangeEvent records to topic {}. ^c to exit.". format(topic)) with open(data_file) as fp: cnt = 0 while True: sample = fp.readline() cnt += 1 if not sample: break try: content = ast.literal_eval(sample.strip()) producer.produce(topic=topic, value=content) producer.poll(0) print(" MCE{}: {}".format(cnt, sample)) except KeyboardInterrupt: break except ValueError as e: print("Message serialization failed {}".format(e)) break print("Flushing records...") producer.flush()
class Demonstrator: def __init__(self, broker_urls, registry_url, topic): self.broker_urls = broker_urls self.registry_url = registry_url if topic == "login": self.value_schema = schema_login self.key_schema = schema_login if topic == "message": self.value_schema = schema_message self.key_schema = schema_message if topic == "mouse": self.value_schema = schema_mouse self.key_schema = schema_mouse self.avroProducer = AvroProducer({ 'bootstrap.servers': self.broker_urls, 'on_delivery': delivery_report, 'schema.registry.url': self.registry_url, }, default_key_schema=self.key_schema, default_value_schema=self.value_schema) def checkLocation(self, loc,port): a_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) location = (loc, port) result_of_check = self.a_socket.connect_ex(location) if result_of_check == 0: print("Port is open") else: print("Port is not open") a_socket.close() def produceMessage(self, topic, value, key): self.avroProducer.produce(topic=topic, value=value, key=key) self.avroProducer.flush() def createTopic(self, *topic1, partitions, replication): a = AdminClient({'bootstrap.servers': self.broker_url}) new_topics = [NewTopic(topic, num_partitions=int(partitions), replication_factor=int(replication)) for topic in [topic1]] fs = a.create_topics(new_topics) for topic, f in fs.items(): try: f.result() print("Topic {} created".format(topic)) except Exception as e: print("Failed to create topic {}: {}".format(topic, e))
def send_to_kafka(): Timer(10.0, send_to_kafka).start() try: print("running") avro_producer = AvroProducer( { 'bootstrap.servers': 'up01:9092,up02:9092,up03:9092', 'schema.registry.url': 'http://up04:8081' }, default_key_schema=key_schema, default_value_schema=value_schema) value = read_from_sense_hat() print(value) avro_producer.poll(0) avro_producer.produce(topic='test_avro_2', value=value, key=key, callback=delivery_report) avro_producer.flush() except Exception as e: logging.error(traceback.format_exc())
def __init__(self, path_to_scheme, broker_url=os.environ.get('KAFKA_BROKER_URL'), topic=os.environ.get('PRODUCER_TOPIC'), schema_registry_url='http://schema_registry:8081'): self.value_scheme = avro.load(path_to_scheme) self.p = AvroProducer( { 'bootstrap.servers': broker_url, 'schema.registry.url': schema_registry_url }, default_value_schema=self.value_scheme) self.topic = topic
async def produce(topic_name): """Produces data into the Kafka Topic""" # See: https://github.com/confluentinc/confluent-kafka-python/blob/master/confluent_kafka/avro/cached_schema_registry_client.py#L47 schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL}) # See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer p = AvroProducer({"bootstrap.servers": BROKER_URL}, schema_registry=schema_registry) while True: # See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer p.produce(topic=topic_name, value=asdict(ClickEvent()), value_schema=ClickEvent.schema) await asyncio.sleep(1.0)
def produce(topic, conf): """ Produce User records """ from confluent_kafka.avro import AvroProducer producer = AvroProducer(conf, default_value_schema=record_schema) print("Producing user records to topic {}. ^c to exit.".format(topic)) while True: # Instantiate new User, populate fields, produce record, execute callbacks. record = User() try: record.name = input("Enter name: ") record.favorite_number = int(input("Enter favorite number: ")) record.favorite_color = input("Enter favorite color: ") # The message passed to the delivery callback will already be serialized. # To aid in debugging we provide the original object to the delivery callback. producer.produce(topic=topic, value=record.to_dict(), callback=lambda err, msg, obj=record: on_delivery( err, msg, obj)) # Serve on_delivery callbacks from previous asynchronous produce() producer.poll(0) except KeyboardInterrupt: break except ValueError: print("Invalid input, discarding record...") continue print("\nFlushing records...") producer.flush()
class vroducer(): def __init__(self, avro_schema, BOOTSTRAP_SERVERS, SCHEMA_REGISTRY_PATH): self.avroProducer = AvroProducer( { 'bootstrap.servers': BOOTSTRAP_SERVERS, 'on_delivery': self.delivery_report, 'schema.registry.url': SCHEMA_REGISTRY_PATH }, default_value_schema=avro_schema) self.logger = logging.getLogger("VRODUCER") def produce_message(self, topic_name, message): self.avroProducer.produce(topic=topic_name, value=message) self.avroProducer.flush() def produce_message_bulk(self, topic_name, message_list): for message in message_list: self.avroProducer.produce(topic=TOPIC_NAME, value=message) self.avroProducer.flush() def delivery_report(self, err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: self.logger.error('Message delivery failed: {}'.format(err)) else: self.logger.info('Message delivered to {} [{}]'.format( msg.topic(), msg.partition()))
async def produce(topic_name): """Produces data into the Kafka Topic""" p = AvroProducer({ "bootstrap.servers": "PLAINTEXT://localhost:9092", "schema.registry.url": "http://localhost:8081", }) try: while True: p.produce(topic=topic_name, value=asdict(Purchase()), value_schema=Purchase.schema) await asyncio.sleep(0.1) except: raise
class KafkaProducer: def __init__( self, key_schema_str, value_schema_str, raw_key_model, raw_value_model, topic_name, header_index, ): self.topic_name = topic_name self.header_index = header_index self.key_schema = avro.loads(key_schema_str) self.value_schema = avro.loads(value_schema_str) self.avro_producer = AvroProducer( { "bootstrap.servers": os.environ["BOOTSTRAP_SERVERS"], "schema.registry.url": os.environ["SCHEMA_REGISTRY_URL"], # Safe producer settings # 'enable.idempotence': True, # High throughput # 'compression.type': 'snappy', # 'linger.ms': 20, # 'batch.size': 32768 }, default_key_schema=self.key_schema, default_value_schema=self.value_schema, ) self.raw_key_model = raw_key_model self.raw_value_model = raw_value_model def preprocessing(self, data): backed_key_obj = dict() backed_value_obj = dict() for key_model in self.raw_key_model: key_index = self.header_index[key_model] backed_key_obj[key_model] = data[key_index] for value_model in self.raw_value_model: value_index = self.header_index[value_model] backed_value_obj[value_model] = data[value_index] return backed_key_obj, backed_value_obj def produce_event(self, data, pre_process=True): if pre_process == True: key, value = self.preprocessing(data) else: key, value = data self.avro_producer.produce(topic=self.topic_name, key=key, value=value) self.avro_producer.poll(0.1)
async def produce(topic_name): p = AvroProducer({ "bootstrap.servers": "PLAINTEXT://localhost:9092", "schema.registry.url": "http://localhost:8081", }) try: while True: p.produce( topic=topic_name, value=asdict(ClickEvent()), value_schema=ClickEvent.schema, ) await asyncio.sleep(0.1) except: raise
def __init__(self): os.chdir(os.path.dirname(__file__)) pwd = os.getcwd() self._producer = AvroProducer( { 'bootstrap.servers': KAFKA_SERVER, 'schema.registry.url': SCHEMA_REGISTRY_URL, 'security.protocol': 'ssl', 'ssl.ca.location': pwd + CAFILE, 'ssl.certificate.location': pwd + CERTFILE, 'ssl.key.location': pwd + KEYFILE }, default_key_schema=key_schema, default_value_schema=value_schema)
def __init__(self, topic, value_schema_path, key_schema_path=None, config=None): schema = {'default_value_schema': avro.load(value_schema_path)} if key_schema_path is not None: schema['default_key_schema'] = avro.load(key_schema_path) self.producer = AvroProducer( { **load_producer_config(), **(config or {}) }, **schema) self.topic = topic
def produce(topic, conf): """ Produce User records """ from confluent_kafka.avro import AvroProducer producer = AvroProducer(conf, default_value_schema=record_schema) print("Producing user records to topic {}. ^c to exit.".format(topic)) while True: # Instantiate new User, populate fields, produce record, execute callbacks. record = User() try: record.name = input("Enter name: ") record.favorite_number = int(input("Enter favorite number: ")) record.favorite_color = input("Enter favorite color: ") # The message passed to the delivery callback will already be serialized. # To aid in debugging we provide the original object to the delivery callback. producer.produce(topic=topic, value=record.to_dict(), callback=lambda err, msg, obj=record: on_delivery(err, msg, obj)) # Serve on_delivery callbacks from previous asynchronous produce() producer.poll(0) except KeyboardInterrupt: break except ValueError: print("Invalid input, discarding record...") continue print("\nFlushing records...") producer.flush()
def test_produce_with_empty_value_no_schema(self): schema_registry = MockSchemaRegistryClient() producer = AvroProducer({}, schema_registry=schema_registry) with self.assertRaises(ValueSerializerError): producer.produce(topic='test', value='', key='not empty')
from lipsum import generate_words import os import random SCHEMA_REGISTRY_URL = 'http://172.17.0.5:8081' BOOTSTRAP_SERVERS = '172.17.0.4' AVSC_DIR = os.path.dirname(os.path.realpath(__file__)) KEY_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'primitive_string.avsc')) VALUE_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'basic_schema.avsc')) TOPIC = 'avrotopic' KEY = "mykey" avroProducer = AvroProducer({'bootstrap.servers': BOOTSTRAP_SERVERS, 'schema.registry.url': SCHEMA_REGISTRY_URL}, default_key_schema=KEY_SCHEMA, default_value_schema=VALUE_SCHEMA) for i in xrange(100): value = {"name": generate_words(count=1), "surname": generate_words(count=2), "number": random.randint(0, 100)} print str(value) avroProducer.produce(topic=TOPIC, value=value, key=KEY)
def test_produce_no_value_schema(self): producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}) with self.assertRaises(ValueSerializerError): # Producer should not accept a value with no schema producer.produce(topic='test', value={"name": 'abc"'})
def test_produce_no_key_schema(self): producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}) with self.assertRaises(KeySerializerError): # If the key is provided as a dict an avro schema must also be provided producer.produce(topic='test', key={"name": 'abc"'})
def test_produce_value_and_key_schemas(self): value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}, default_value_schema=value_schema, default_key_schema=value_schema) with self.assertRaises(ConnectionError): # Unexistent schema-registry producer.produce(topic='test', value={"name": 'abc"'}, key={"name": 'abc"'})