def produce_tweet(): if 'username' in request.cookies: id = request.form['id'] content = request.form['content'] location = request.form['location'] # extract tags and mentions :) tags = [h for h in content.split() if h.startswith('#')] mentions = [h for h in content.split() if h.startswith('@')] value = { "author": f"{id}", "content": f"{content}", "timestamp": f"{time.time()}", "location": f"{location}", "tags": tags, "mentions": mentions } key = {"name": f"{id}"} p = AvroProducer( { 'bootstrap.servers': BOOTSTRAP_SERVERS, 'enable.idempotence': 'true', # for EOS: assures that only one tweet in sent 'schema.registry.url': SCHEMA_REGISTRY_URL }, default_key_schema=KEY_SCHEMA, default_value_schema=VALUE_SCHEMA) p.produce(topic=TOPIC, value=value, key=key) p.flush() return 'Tweet published!' else: return 'Oooops, your are not logged in...'
def produce(topic, conf): """ Produce User records """ from confluent_kafka.avro import AvroProducer producer = AvroProducer(conf, default_value_schema=record_schema) print("Producing user records to topic {}. ^c to exit.".format(topic)) while True: # Instantiate new User, populate fields, produce record, execute callbacks. record = User() try: record.name = input("Enter name: ") record.favorite_number = int(input("Enter favorite number: ")) record.favorite_color = input("Enter favorite color: ") # The message passed to the delivery callback will already be serialized. # To aid in debugging we provide the original object to the delivery callback. producer.produce(topic=topic, value=record.to_dict(), callback=lambda err, msg, obj=record: on_delivery(err, msg, obj)) # Serve on_delivery callbacks from previous asynchronous produce() producer.poll(0) except KeyboardInterrupt: break except ValueError: print("Invalid input, discarding record...") continue print("\nFlushing records...") producer.flush()
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = { 'bootstrap.servers': config.BROKER_URL, 'schema.registry.url': config.SCHEMA_REGISTRY_URL } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) self.producer = AvroProducer(self.broker_properties, default_key_schema=self.key_schema, default_value_schema=self.value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" client = AdminClient({ "bootstrap.servers": self.broker_properties.get("bootstrap.servers") }) topic_metadata = client.list_topics(timeout=5) if self.topic_name not in topic_metadata.topics: client.create_topics([ NewTopic(topic=self.topic_name, num_partitions=self.num_partitions, replication_factor=self.num_replicas) ]) logger.info(f"topic {self.topic_name} created") # def time_millis(self): # return int(round(time.time() * 1000)) def close(self): """Prepares the producer for exit by cleaning up the producer""" if self.producer is not None: self.producer.flush() def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
def send_to_kafka(): Timer(10.0, send_to_kafka).start() try: print("running") avro_producer = AvroProducer( { 'bootstrap.servers': 'up01:9092,up02:9092,up03:9092', 'schema.registry.url': 'http://up04:8081' }, default_key_schema=key_schema, default_value_schema=value_schema) value = read_from_sense_hat() print(value) avro_producer.poll(0) avro_producer.produce(topic='test_avro_2', value=value, key=key, callback=delivery_report) avro_producer.flush() except Exception as e: logging.error(traceback.format_exc())
class QRSProducer(object): def __init__(self, **kwargs): self.TOPIC = kwargs.get("TOPIC", "db") self.schema_registry_url = kwargs.get("SCHEMA_REGISTRY_URL") self.logger = kwargs.get("logger", logging.getLogger()) self.Q = kwargs.get("Q") self.bootstrap_servers = kwargs.get("BOOTSTRAP_SERVERS") self.producer = AvroProducer( { 'bootstrap.servers': self.bootstrap_servers, 'on_delivery': self.delivery_report, 'schema.registry.url': self.schema_registry_url }, default_key_schema=key_schema, default_value_schema=value_schema) def delivery_report(self, error, message): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if error is not None: print('Message delivery failed: {}'.format(error)) else: print('Message delivered to {} [{}]'.format( message.topic(), message.partition())) def produce(self, key, value): self.producer.produce(topic=self.TOPIC, value=value, key=key) self.producer.flush()
class KafkaAvroMessageProducer(object): def __init__(self, args): self.args = args self.avro_producer = AvroProducer( { 'bootstrap.servers': self.args.brokers, 'schema.registry.url': self.args.registry }, default_key_schema=avro.loads(self.args.keyschema), # key schema default_value_schema=avro.loads(self.args.schema) # value schema ) def produce(self): if self.args.input is None: # interactive for line in sys.stdin: clean_line = line.strip() if not clean_line: break self.produce_one(clean_line, flush=True) else: with open(self.args.input, 'r') as f: for line in f: self.produce_one(line) self.avro_producer.flush() def produce_one(self, line, flush=False): key, value = line.split(self.args.separator) self.avro_producer.produce(topic=self.args.topic, key=json.loads(key), value=json.loads(value)) if flush: self.avro_producer.flush()
def send_record(args): key_schema, value_schema = load_avro_schema_from_file( args.key_schema_file, args.value_schema_file) producer_config = { "bootstrap.servers": args.bootstrap_servers, "schema.registry.url": args.schema_registry, } producer = AvroProducer( producer_config, default_key_schema=key_schema, default_value_schema=value_schema, ) key = json.loads(args.record_key) if args.record_key else str(uuid.uuid4()) value = json.loads(args.record_value) try: producer.produce(topic=args.topic, key=key, value=value) except Exception as e: print( f"Exception while producing record value - {value} to topic - {args.topic}: {e}" ) else: print( f"Successfully producing record value - {value} to topic - {args.topic}" ) producer.flush()
def confluent_kafka_producer_performance(args): value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) avroProducer = AvroProducer({ 'bootstrap.servers': args.bootstrap_servers, 'schema.registry.url': args.schema_registry }, default_key_schema=key_schema, default_value_schema=value_schema) messages_to_retry = 0 for i in range(int(args.msg_count)): value = {"data": random.choice(simple_messages)} key = {"key": str(uuid.uuid4())} try: avroProducer.produce(topic=args.topic, value=value, key=key) except BufferError as e: messages_to_retry += 1 for i in range(messages_to_retry): avroProducer.poll(0) try: avroProducer.produce(topic=args.topic, value=value, key=key) except BufferError as e: avroProducer.poll(0) avroProducer.produce(topic=args.topic, value=value, key=key) avroProducer.flush()
def send_record(): key_schema, value_schema = load_avro_schema_from_file() producer_config = { "bootstrap.servers": 'kafka.qa-aws.intranet..:9092', "schema.registry.url": 'http://schema-registry.qa-aws.intranet..:8081' } producer = AvroProducer(producer_config, default_key_schema=key_schema, default_value_schema=value_schema) key = str(uuid.uuid4()) # value = json.loads(data) try: producer.produce(topic='fct.dsr.financialservices.loan.Limites', key=key, value=data) except Exception as e: print(f"Exception while producing record value - {data}: {e}") else: print(f"Successfully producing record value") producer.flush()
def produce(): value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format( msg.topic(), msg.partition())) avro_producer = AvroProducer( { 'bootstrap.servers': config.BOOTSTRAP_SERVERS, 'on_delivery': delivery_report, 'schema.registry.url': config.SCHEMA_REGISTRY_URL }, default_key_schema=key_schema, default_value_schema=value_schema) cluster_metadata = avro_producer.list_topics() if TOPIC_NAME not in cluster_metadata.topics.keys(): for name in most_common_names_usa: value = {"rank": name[0], "name": name[1], "data": name[2]} key = {"rank": name[0]} avro_producer.produce(topic=TOPIC_NAME, value=value, key=key) avro_producer.flush() else: print(f"{TOPIC_NAME} exists, do nothing")
def write_tweets(tweets, filename): ''' Function that appends tweets to a file. ''' value_schema = avro.load('ValueSchema.avsc') key_schema = avro.load('KeySchema.avsc') avroProducer = AvroProducer( {'bootstrap.servers': '172.27.146.20:9092', 'schema.registry.url': 'http://172.27.146.20:8081'}, default_key_schema=key_schema, default_value_schema=value_schema) for tweet in tweets : x = json.dumps(tweet._json) jsonObj = json.loads(x) created_at=jsonObj['created_at'] id_str=jsonObj['id_str'] name=jsonObj['user']['name'] screen_name=jsonObj['user']['screen_name'] text=jsonObj['text'] key = {"id_str": id_str } value = {"id_str": id_str, "created_at": created_at, "name": name, "screen_name": screen_name, "text": text} avroProducer.produce(topic='bigData1', value=value, key=key, key_schema=key_schema, value_schema=value_schema) print(value) sleep(0.01) avroProducer.flush(10)
class KafkaClient: """ client for publishing vectorization results to kafka """ def __init__(self, schema_registry='http://127.0.0.1:8081', bootstrap_servers='localhost:9092', topic='paintings'): self.painting_schema = avro.load('../avro/painting.avsc') self.painting_key_schema = avro.load('../avro/painting.key.avsc') self.topic = topic self.avro_producer = AvroProducer( { 'bootstrap.servers': bootstrap_servers, 'schema.registry.url': schema_registry, 'default.topic.config': { 'acks': 'all' } }, default_value_schema=self.painting_schema, default_key_schema=self.painting_key_schema) def submit(self, vectorized_img): if isinstance(vectorized_img, VectorizedImage): value = vectorized_img.to_dict() self.avro_producer.produce( topic=self.topic, key={'filename': vectorized_img.filename}, value=value) else: raise Exception("vectorized image must be an instances of " + VectorizedImage.__name__) def flush(self): self.avro_producer.flush()
def produce_test_messages_with_avro( avro_producer: AvroProducer, topic: Tuple[str, int]) -> Iterable[KafkaMessage]: topic_name, num_partitions = topic with open("tests/test_samples/key_schema.avsc", "r") as file: key_schema = load_schema(file.read()) with open("tests/test_samples/value_schema.avsc", "r") as file: value_schema = load_schema(file.read()) messages = [] for i in range(10): partition = random.randrange(0, num_partitions) key = {"id": str(i)} value = {"first": "Firstname", "last": "Lastname"} messages.append( KafkaMessage(json.dumps(key), json.dumps(value), partition, key_schema, value_schema)) avro_producer.produce( topic=topic_name, key=key, value=value, key_schema=key_schema, value_schema=value_schema, partition=partition, ) avro_producer.flush() return messages
class Collector: def __init__(self): self._logger = logging.getLogger('gunicorn.error') value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) self._producer = AvroProducer({ 'bootstrap.servers': f'{os.getenv("BROKER_HOST")}:9092', 'schema.registry.url': f'http://{os.getenv("SCHEMA_REGISTRY_HOST")}:8081', 'on_delivery': self._delivery_report }, default_key_schema=key_schema, default_value_schema=value_schema) def collect_phrase(self, phrase): phrase = phrase.lower().translate({ord(i): None for i in '|'}) # Remove pipe characther, which is treated as a special character in this system self._producer.produce(topic='phrases', value={"phrase": phrase}, key={"phrase": phrase}) self._producer.flush() def _delivery_report(self, err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: self._logger.error('Message delivery to broker failed: {}'.format(err)) else: self._logger.info('Message delivered to broker on {} [{}]'.format(msg.topic(), msg.partition()))
def produce(conf, data_file, schema_record): """ Produce MetadataChangeEvent records """ producer = AvroProducer(conf, default_value_schema=avro.load(schema_record)) print("Producing MetadataChangeEvent records to topic {}. ^c to exit.". format(topic)) with open(data_file) as fp: cnt = 0 while True: sample = fp.readline() cnt += 1 if not sample: break try: content = ast.literal_eval(sample.strip()) producer.produce(topic=topic, value=content) producer.poll(0) print(" MCE{}: {}".format(cnt, sample)) except KeyboardInterrupt: break except ValueError as e: print("Message serialization failed {}".format(e)) break print("Flushing records...") producer.flush()
def send_record(args): if args.record_value is None: raise AttributeError("--record-value is not provided.") if args.schema_file is None: raise AttributeError("--schema-file is not provided.") key_schema, value_schema = load_avro_schema_from_file(args.schema_file) producer_config = { "bootstrap.servers": args.bootstrap_servers, "schema.registry.url": args.schema_registry } producer = AvroProducer(producer_config, default_key_schema=key_schema, default_value_schema=value_schema) key = args.record_key if args.record_key else str(uuid.uuid4()) value = json.loads(args.record_value) try: producer.produce(topic=args.topic, key=key, value=value) except Exception as e: print( f"Exception while producing record value - {value} to topic - {args.topic}: {e}" ) else: print( f"Successfully producing record value - {value} to topic - {args.topic}" ) producer.flush()
class MyAvroProducer(): def __init__(self, schema_name, topic): kafka_cfg = parse_kafka_config() key_schema, value_schema = load_avro_schema_from_registry( schema_name, kafka_cfg['schema-registry-url']) producer_config = { "bootstrap.servers": kafka_cfg['bootstrap-servers'], "schema.registry.url": kafka_cfg['schema-registry-url'] } self.topic = topic self.producer = AvroProducer(producer_config, default_key_schema=key_schema, default_value_schema=value_schema) def send_record(self, record_value, record_key=None): key = record_key if record_key else str(uuid.uuid4()) value = json.loads(record_value) try: self.producer.produce(topic=self.topic, key=key, value=value) except Exception as e: print( f"Exception while producing record value - {value} to topic - {self.topic}: {e}" ) else: print( f"Successfully producing record value - {value} to topic - {self.topic}" ) self.producer.flush()
class vroducer(): def __init__(self, avro_schema, BOOTSTRAP_SERVERS, SCHEMA_REGISTRY_PATH): self.avroProducer = AvroProducer( { 'bootstrap.servers': BOOTSTRAP_SERVERS, 'on_delivery': self.delivery_report, 'schema.registry.url': SCHEMA_REGISTRY_PATH }, default_value_schema=avro_schema) self.logger = logging.getLogger("VRODUCER") def produce_message(self, topic_name, message): self.avroProducer.produce(topic=topic_name, value=message) self.avroProducer.flush() def produce_message_bulk(self, topic_name, message_list): for message in message_list: self.avroProducer.produce(topic=TOPIC_NAME, value=message) self.avroProducer.flush() def delivery_report(self, err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: self.logger.error('Message delivery failed: {}'.format(err)) else: self.logger.info('Message delivered to {} [{}]'.format( msg.topic(), msg.partition()))
class AvroProducerFacade: def __init__(self, name, emit_datum, broker, schema_registry_url): self.name = name self.emit_datum = emit_datum schema = avro.loads(get_schema_def()) self.producer = AvroProducer( { 'bootstrap.servers': broker, 'schema.registry.url': schema_registry_url, **get_sr_config_from_environment(), **get_kafka_config_from_environment(), }, default_key_schema=schema, default_value_schema=schema) def delivery_callback(self, err, msg): if err: log.debug("Failed to send from '%s': %s", self.name, err) datum = Datum(bad_count=1) else: datum = Datum(good_count=1) self.emit_datum(datum) def produce(self, topic, poll_wait=0): value = {'name': 'foo'} self.producer.produce(topic=topic, callback=self.delivery_callback, key=value, value=value) self.producer.poll(poll_wait) def close(self): self.producer.flush()
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = { "bootstrap.servers": BROKER_URL, "schema.registry.url": SCHEMA_REGISTRY } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) self.producer = AvroProducer(self.broker_properties, default_key_schema=self.key_schema, default_value_schema=self.value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" client = AdminClient( {"bootstrap.servers": self.broker_properties["bootstrap.servers"]}) futures = client.create_topics([ NewTopic(topic=self.topic_name, num_partitions=self.num_partitions, replication_factor=self.num_replicas) ]) for topic, future in futures.items(): try: future.result() logger.info("topic created") except Exception as e: logger.error(f"failed to create topic {self.topic_name}: {e}") def time_millis(self): return int(round(time.time() * 1000)) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.producer.flush() logger.info("Producer prepared for exit.")
def kafka_producer(topic_name, BROKER_URL, SCHEMA_REGISTRY_URL, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME): """ Kafka Avro Producer, produces events given schema """ # Avro schema key_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/key_schema.json") value_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/value_schema.json") # Get a handle on s3 s3 = boto3.resource('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) # s3_object = s3.Object(bucket_name=S3_BUCKET_NAME, key=f'{OPERATOR}.txt') s3_object = s3.Object(bucket_name=S3_BUCKET_NAME, key='Bird.txt') streaming_body = s3_object.get()['Body'] broker_properties = { "bootstrap.servers": BROKER_URL, "schema.registry.url": SCHEMA_REGISTRY_URL, "client.id": "base.producer", } producer = AvroProducer( broker_properties, default_key_schema=key_schema, default_value_schema=value_schema, ) keys = [ 'bike_id', 'is_disabled', 'is_reserved', 'last_updated', 'lat', 'lon', 'operator', 'vehicle_type' ] while True: try: for ln in codecs.getreader('utf-8')(streaming_body): # sl = ln.rstrip().split(",") d = dict((x.strip(), y.strip()) for x, y in (element.split(':') for element in ln.split(', '))) print(d) if set(keys).issubset((d.keys())): producer.produce( topic=topic_name, key={"timestamp": time_millis()}, value=asdict( Event(d['bike_id'], distutils.util.strtobool(d['is_disabled']), distutils.util.strtobool(d['is_reserved']), int(d['last_updated']), float(d['lat']), float(d['lon']), d['operator'], d['vehicle_type'])), on_delivery=acked) time.sleep(2) except KeyboardInterrupt: break producer.flush(timeout=1)
def produce( self, http_check_result: HttpCheckResult, ): key_schema = avro.load(self.config.avro_key_schema) value_schema = avro.load(self.config.avro_value_schema) p = AvroProducer( { "bootstrap.servers": self.config.broker, "on_delivery": self.delivery_report, "schema.registry.url": self.config.schema_registry_url, }, default_key_schema=key_schema, default_value_schema=value_schema, ) key = {"timestamp": http_check_result.timestamp.isoformat()} value = { "status_code": http_check_result.status_code, "matches_regex": http_check_result.match_regex, "response_time_seconds": http_check_result.response_time_seconds, } logger.debug(f"Produced message: {key} {value}") p.produce(topic=self.config.topic, value=value, key=key) p.flush()
def run(args, messenger): """Produce messages according to the specified Avro schema""" assert args.schema_registry_host is not None and args.schema is not None value_schema = avro.load(args.schema) conf = { 'bootstrap.servers': "{}:9092".format(args.schema_registry_host), 'schema.registry.url': "http://{}:8081".format(args.schema_registry_host) } avro_producer = AvroProducer(conf, default_value_schema=value_schema) while True: # Get current timestamp timestamp = int(time.time()) # Assemble avro-formatted message filled with generated data message, values = messenger.get_message(timestamp) # Publish the message under the specified topic on the message bus # avro_producer.produce(topic=args.topic, value=message) if args.carbon_host is not None: # If a Carbon host is provided, send to Grafana dashboard messenger.send_dashboard(timestamp, values, args.carbon_host) # Flush the buffer avro_producer.flush() # Wait a second time.sleep(1.0 / args.frequency)
def produce(topic, conf): """ Produce User records """ from confluent_kafka.avro import AvroProducer producer = AvroProducer(conf, default_value_schema=record_schema) print("Producing user records to topic {}. ^c to exit.".format(topic)) while True: # Instantiate new User, populate fields, produce record, execute callbacks. record = User() try: record.name = input("Enter name: ") record.favorite_number = int(input("Enter favorite number: ")) record.favorite_color = input("Enter favorite color: ") # The message passed to the delivery callback will already be serialized. # To aid in debugging we provide the original object to the delivery callback. producer.produce(topic=topic, value=record.to_dict(), callback=lambda err, msg, obj=record: on_delivery( err, msg, obj)) # Serve on_delivery callbacks from previous asynchronous produce() producer.poll(0) except KeyboardInterrupt: break except ValueError: print("Invalid input, discarding record...") continue print("\nFlushing records...") producer.flush()
def produce(self, preparedMessageArray): prodConf = self.producerConfig() producer = AvroProducer(prodConf, default_value_schema=self.avroSchema) for preparedMessage in preparedMessageArray: producer.produce(topic=self.getTopic(), value=preparedMessage.to_dict(), callback=lambda err, msg, obj=preparedMessage: self.on_delivery(err, msg, obj)) producer.flush()
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas # Configure the broker properties self.broker_properties = { "BROKER_URL": "PLAINTEXT://localhost:9092", "SCHEMA_REGISTRY_URL": "http://localhost:8081", "group.id": f"{self.topic_name}", } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) # Configure the AvroProducer self.producer = AvroProducer( {"bootstrap.servers": self.broker_properties["BROKER_URL"]}, schema_registry=CachedSchemaRegistryClient( {"url": self.broker_properties["SCHEMA_REGISTRY_URL"]}, )) def create_topic(self): """Creates the producer topic if it does not already exist""" client = AdminClient( {"bootstrap.servers": self.broker_properties["BROKER_URL"]}) topic = NewTopic( self.topic_name, num_partitions=self.num_partitions, replication_factor=self.num_replicas, ) client.create_topics([topic]) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.producer.flush() def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances client = AdminClient({"bootstrap.servers": BROKER_URL}) existing_topics = set( t.topic for t in iter(client.list_topics(timeout=5).topics.values())) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) else: logger.debug("Topic already exists: %s", self.topic_name) schema_registry = CachedSchemaRegistryClient( {"url": SCHEMA_REGISTRY_URL}) self.producer = AvroProducer({"bootstrap.servers": BROKER_URL}, schema_registry=schema_registry) def create_topic(self): """Creates the producer topic if it does not already exist""" logger.info("Creating topic: %s", self.topic_name) futures = self.client.create_topics([ NewTopic(topic=self.topic_name, num_partitions=self.num_partitions, replication_factor=self.num_replicas), ]) for _, future in futures.items(): try: future.result() except Exception as e: pass def time_millis(self): return int(round(time.time() * 1000)) def close(self): self.producer.flush() def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
def produce_asgard_message(context, topic, sentiment_message): producer = AvroProducer( { 'bootstrap.servers': context.broker, 'schema.registry.url': context.schema_registry_url }, default_value_schema=context.sentiment_schema) producer.produce(topic=topic, value=sentiment_message) producer.flush()
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema=None, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = { 'bootstrap.servers': 'PLAINTEXT://localhost:9092', 'schema.registry.url': 'http://localhost:8081' } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) #Configure the AvroProducer self.producer = AvroProducer(config=self.broker_properties, default_key_schema=self.key_schema, default_value_schema=self.value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" new_topics = [ NewTopic(self.topic_name, num_partitions=self.num_partitions, replication_factor=self.num_replicas) ] admin = AdminClient( {'bootstrap.servers': self.broker_properties['bootstrap.servers']}) admin.create_topics(new_topics) def time_millis(self): return int(round(time.time() * 1000)) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.producer.flush() def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = { "bootstrap.servers": "localhost:9092,localhost:9093,localhost:9094" } # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) self.producer = AvroProducer(config={ "bootstrap.servers": "localhost:9092,localhost:9093,localhost:9094", "schema.registry.url": "http://localhost:8081", }, default_key_schema=self.key_schema, default_value_schema=self.value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" admin_client = AdminClient(self.broker_properties) new_topic = NewTopic(self.topic_name, 1, 1) admin_client.create_topics([new_topic]) def time_millis(self): return int(round(time.time() * 1000)) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.producer.flush() self.producer.close() logger.info( f"Producer for topic : {self.topic_name} has been flushed and closed." )
class Producer: """Defines and provides common functionality amongst Producers""" # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__( self, topic_name, key_schema, value_schema=None, num_partitions=1, num_replicas=1, ): """Initializes a Producer object with basic settings""" self.topic_name = topic_name self.key_schema = key_schema self.value_schema = value_schema self.num_partitions = num_partitions self.num_replicas = num_replicas self.broker_properties = {"id": 1, "host": "localhost", "port": 29092} # If the topic does not already exist, try to create it if self.topic_name not in Producer.existing_topics: self.create_topic() Producer.existing_topics.add(self.topic_name) self.producer = AvroProducer( { 'bootstrap.servers': self.broker_properties, 'schema.registry.url': 'http://localhost:8091' }, default_key_schema=key_schema, default_value_schema=value_schema) def create_topic(self): """Creates the producer topic if it does not already exist""" NewTopic(self.topic_name, self.num_partitions, self.num_replicas) logger.info("topic creation kafka integration complete") def time_millis(self): return int(round(time.time() * 1000)) def close(self): """Prepares the producer for exit by cleaning up the producer""" # # self.producer.flush() # # logger.info("producer close incomplete - skipping") def time_millis(self): """Use this function to get the key for Kafka Events""" return int(round(time.time() * 1000))
SCHEMA_REGISTRY_URL = 'http://172.17.0.5:8081' BOOTSTRAP_SERVERS = '172.17.0.4' AVSC_DIR = os.path.dirname(os.path.realpath(__file__)) KEY_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'primitive_string.avsc')) VALUE_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'basic_schema.avsc')) TOPIC = 'avrotopic' KEY = "mykey" avroProducer = AvroProducer({'bootstrap.servers': BOOTSTRAP_SERVERS, 'schema.registry.url': SCHEMA_REGISTRY_URL}, default_key_schema=KEY_SCHEMA, default_value_schema=VALUE_SCHEMA) for i in xrange(100): value = {"name": generate_words(count=1), "surname": generate_words(count=2), "number": random.randint(0, 100)} print str(value) avroProducer.produce(topic=TOPIC, value=value, key=KEY) avroProducer.flush()