def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: p = Producer() except TypeError as e: assert str(e) == "expected configuration dict" p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) p.produce('mytopic') p.produce('mytopic', value='somedata', key='a key') def on_delivery(err, msg): print('delivery', str) # Since there is no broker, produced messages should time out. assert err.code() == KafkaError._MSG_TIMED_OUT p.produce(topic='another_topic', value='testing', partition=9, callback=on_delivery) p.poll(0.001) p.flush(0.002) p.flush() try: p.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
class KafkaPublisher(object): def __init__(self, connection, asynchronous=True): from confluent_kafka import Producer self.producer = Producer(connection or {}) self.asynchronous = asynchronous def publish(self, channel, value, key=None): self.producer.produce(topic=channel, value=value, key=key) if not self.asynchronous: self.producer.flush()
def test_produce_timestamp(): """ Test produce() with timestamp arg """ p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) # Requires librdkafka >=v0.9.4 try: p.produce('mytopic', timestamp=1234567) except NotImplementedError: # Should only fail on non-supporting librdkafka if libversion()[1] >= 0x00090400: raise p.flush()
def producer(args, sniff_timeout_ms=500, sniff_promisc=True): """ Captures packets from a network interface and sends them to a Kafka topic. """ # setup the signal handler signal.signal(signal.SIGINT, signal_handler) global producer_args producer_args = args # connect to kafka logging.info("Connecting to Kafka; %s", args.kafka_configs) kafka_producer = Producer(args.kafka_configs) # initialize packet capture logging.info("Starting packet capture") capture = pcapy.open_live(args.interface, args.snaplen, sniff_promisc, sniff_timeout_ms) pkts_in = 0 try: while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets): # capture a packet (pkt_hdr, pkt_raw) = capture.next() if pkt_hdr is not None: logging.debug("Packet received: pkts_in=%d, pkt_len=%s", pkts_in, pkt_hdr.getlen()) pkts_in += 1 pkt_ts = timestamp(pkt_hdr) kafka_producer.produce(args.kafka_topic, key=pack_ts(pkt_ts), value=pkt_raw, callback=delivery_callback) # pretty print, if needed if args.pretty_print > 0 and pkts_in % args.pretty_print == 0: print 'Packet received[%s]' % (pkts_in) # serve the callback queue kafka_producer.poll(0) finally: # flush all messages logging.info("Waiting for '%d' message(s) to flush", len(kafka_producer)) kafka_producer.flush() # pkts_out may not be initialized if the callback was never executed pkts_out = 0 if hasattr(delivery_callback, "pkts_out"): pkts_out = delivery_callback.pkts_out logging.info("'%d' packet(s) in, '%d' packet(s) out", pkts_in, pkts_out)
class KafkaWorkflowCommunicationSender(object): _requires = ['confluent-kafka'] def __init__(self, message_converter=ProtobufWorkflowCommunicationConverter): kafka_config = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_CONFIG self.producer = Producer(kafka_config) self.topic = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_TOPIC self.message_converter = message_converter def shutdown(self): self.producer.flush() @staticmethod def _delivery_callback(err, msg): if err is not None: logger.error('Kafka message delivery failed: {}'.format(err)) def pause_workflow(self, workflow_execution_id): """Pauses a workflow currently executing. Args: workflow_execution_id (UUID): The execution ID of the workflow. """ logger.info('Pausing workflow {0}'.format(workflow_execution_id)) message = self.message_converter.create_workflow_pause_message(workflow_execution_id) self._send_workflow_communication_message(message, workflow_execution_id) def abort_workflow(self, workflow_execution_id): """Aborts a workflow currently executing. Args: workflow_execution_id (UUID): The execution ID of the workflow. """ logger.info('Aborting running workflow {0}'.format(workflow_execution_id)) message = self.message_converter.create_workflow_abort_message(workflow_execution_id) self._send_workflow_communication_message(message, workflow_execution_id) def send_exit_to_workers(self): """Sends the exit message over the communication sockets, otherwise worker receiver threads will hang""" message = self.message_converter.create_worker_exit_message() self._send_workflow_communication_message(message, None) def _send_workflow_communication_message(self, message, workflow_id): self._send_message(message, self.topic, workflow_id) def _send_message(self, message, topic, key): self.producer.produce(topic, message, key=key, callback=self._delivery_callback)
def test_produce_headers(): """ Test produce() with timestamp arg """ p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) binval = pack('hhl', 1, 2, 3) headers_to_test = [ [('headerkey', 'headervalue')], [('dupkey', 'dupvalue'), ('empty', ''), ('dupkey', 'dupvalue')], [('dupkey', 'dupvalue'), ('dupkey', 'diffvalue')], [('key_with_null_value', None)], [('binaryval', binval)], [('alreadyutf8', u'Småland'.encode('utf-8'))], [('isunicode', 'Jämtland')], {'headerkey': 'headervalue'}, {'dupkey': 'dupvalue', 'empty': '', 'dupkey': 'dupvalue'}, # noqa: F601 {'dupkey': 'dupvalue', 'dupkey': 'diffvalue'}, # noqa: F601 {'key_with_null_value': None}, {'binaryval': binval}, {'alreadyutf8': u'Småland'.encode('utf-8')}, {'isunicode': 'Jämtland'} ] for headers in headers_to_test: print('headers', type(headers), headers) p.produce('mytopic', value='somedata', key='a key', headers=headers) p.produce('mytopic', value='somedata', headers=headers) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', key='a key', headers=('a', 'b')) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', key='a key', headers=[('malformed_header')]) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', headers={'anint': 1234}) p.flush()
def test_dr_msg_errstr(): """ Test that the error string for failed messages works (issue #129). The underlying problem is that librdkafka reuses the message payload for error value on Consumer messages, but on Producer messages the payload is the original payload and no rich error string exists. """ p = Producer({"message.timeout.ms": 10}) def handle_dr(err, msg): # Neither message payloads must not affect the error string. assert err is not None assert err.code() == KafkaError._MSG_TIMED_OUT assert "Message timed out" in err.str() # Unicode safe string p.produce('mytopic', "This is the message payload", on_delivery=handle_dr) # Invalid unicode sequence p.produce('mytopic', "\xc2\xc2", on_delivery=handle_dr) p.flush()
class Publisher(): def __init__(self, config={'bootstrap.servers': 'pulsing.jhk.org:9092', 'retries': 3, 'api.version.request': True}): super().__init__() self.__producer = Producer(config) self.logger = logging.getLogger(__name__) def publish(self, topic, data): self.logger.debug('publish %s - %s', topic, data) self.__producer.produce(topic, data.encode('utf-8')) self.__producer.flush() @property def producer(self): return self.__producer def __eq__(self, other): return self.__producer == other.__producer def __str__(self): return self.__producer.__str__() def __hash__(self): return self.__producer.__hash__()
def test_consumer_rebalance_from_uncommitted_offset(requires_kafka): consumer_group = f"consumer-{uuid.uuid1().hex}" synchronize_commit_group = f"consumer-{uuid.uuid1().hex}" messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer( { "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "on_delivery": record_message_delivered, } ) with create_topic(partitions=2) as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(4): producer.produce(topic, f"{i}".encode(), partition=i % 2) assert producer.flush(5) == 0, "producer did not successfully flush queue" for (topic, partition), offset in { (message.topic(), message.partition()): message.offset() for message in messages_delivered[topic] }.items(): producer.produce( commit_log_topic, key=f"{topic}:{partition}:{synchronize_commit_group}".encode(), value=f"{offset + 1}".encode(), ) assert producer.flush(5) == 0, "producer did not successfully flush queue" consumer_a = SynchronizedConsumer( cluster_name="default", consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) assignments_received = defaultdict(list) def on_assign(consumer, assignment): assignments_received[consumer].append(assignment) consumer_a.subscribe([topic], on_assign=on_assign) consume_until_constraints_met( consumer_a, [lambda message: assignments_received[consumer_a], collect_messages_received(4)], 10, ) assert ( len(assignments_received[consumer_a]) == 1 ), "expected to receive partition assignment" assert {(i.topic, i.partition) for i in assignments_received[consumer_a][0]} == { (topic, 0), (topic, 1), } assignments_received[consumer_a].pop() message = consumer_a.poll(1) assert ( message is None or message.error() is KafkaError._PARTITION_EOF ), "there should be no more messages to receive" consumer_b = SynchronizedConsumer( cluster_name="default", consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) consumer_b.subscribe([topic], on_assign=on_assign) consume_until_constraints_met( consumer_a, [lambda message: assignments_received[consumer_a]], 10 ) consume_until_constraints_met( consumer_b, [lambda message: assignments_received[consumer_b], collect_messages_received(2)], 10, ) for consumer in [consumer_a, consumer_b]: assert len(assignments_received[consumer][0]) == 1 message = consumer_a.poll(1) assert ( message is None or message.error() is KafkaError._PARTITION_EOF ), "there should be no more messages to receive" message = consumer_b.poll(1) assert ( message is None or message.error() is KafkaError._PARTITION_EOF ), "there should be no more messages to receive"
#p.list_topics().topics def receipt(err, msg): if err is not None: print('Error: {}'.format(err)) else: print( "{} : Message on topic {} on partition {} with value of {}".format( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(msg.timestamp()[1] / 1000)), msg.topic(), msg.partition(), msg.value().decode('utf-8'))) for i in range(10): data = { "name": fake.name(), "age": fake.random_int(min=18, max=80, step=1), "street": fake.street_address(), "city": fake.city(), "state": fake.state(), "zip": fake.zipcode() } m = json.dumps(data) p.poll(0) p.produce('users', m.encode('utf-8'), callback=receipt) p.flush()
def acked(err, msg): if err is not None: print("Failed to deliver message: {0}: {1}".format( msg.value(), err.str())) else: print("Message produced: {0}".format(msg.value())) p = Producer({'bootstrap.servers': '192.168.1.107:9092'}) try: host = '192.168.***.***' #client/consumer ip port = 9092 server = ('192.168.***.***', 9092) #server/producer ip s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.bind((host, port)) # while (1): # message = input("-> ") # s.sendto(message.encode('utf-8'), server) # s.close() tt = input() p.produce('testtopic', '{0}'.format(tt), callback=acked) p.poll(0.5) except KeyboardInterrupt: pass p.flush(30) s.close
class KafkaTest: def __init__(self, kafkaAddress, schemaRegistryAddress, kafkaConnectAddress, credentialPath, testVersion, enableSSL): self.testVersion = testVersion self.credentialPath = credentialPath with open(self.credentialPath) as f: credentialJson = json.load(f) testHost = credentialJson["host"] testUser = credentialJson["user"] testDatabase = credentialJson["database"] testSchema = credentialJson["schema"] testWarehouse = credentialJson["warehouse"] pk = credentialJson["encrypted_private_key"] pk_passphrase = credentialJson["private_key_passphrase"] self.TEST_DATA_FOLDER = "./test_data/" self.httpHeader = { 'Content-type': 'application/json', 'Accept': 'application/json' } self.SEND_INTERVAL = 0.01 # send a record every 10 ms self.VERIFY_INTERVAL = 60 # verify every 60 secs self.MAX_RETRY = 120 # max wait time 120 mins self.MAX_FLUSH_BUFFER_SIZE = 5000 # flush buffer when 10000 data was in the queue self.kafkaConnectAddress = kafkaConnectAddress self.schemaRegistryAddress = schemaRegistryAddress self.kafkaAddress = kafkaAddress if enableSSL: print(datetime.now().strftime("\n%H:%M:%S "), "=== Enable SSL ===") self.client_config = { "bootstrap.servers": kafkaAddress, "security.protocol": "SASL_SSL", "ssl.ca.location": "./crts/ca-cert", "sasl.mechanism": "PLAIN", "sasl.username": "******", "sasl.password": "******" } else: self.client_config = {"bootstrap.servers": kafkaAddress} self.adminClient = AdminClient(self.client_config) self.producer = Producer(self.client_config) sc_config = self.client_config sc_config['schema.registry.url'] = schemaRegistryAddress self.avroProducer = AvroProducer(sc_config) reg = "[^\/]*snowflakecomputing" # find the account name account = re.findall(reg, testHost) if len(account) != 1 or len(account[0]) < 20: print( datetime.now().strftime("%H:%M:%S "), "Format error in 'host' field at profile.json, expecting account.snowflakecomputing.com:443" ) pkb = parsePrivateKey(pk, pk_passphrase) self.snowflake_conn = snowflake.connector.connect( user=testUser, private_key=pkb, account=account[0][:-19], warehouse=testWarehouse, database=testDatabase, schema=testSchema) def msgSendInterval(self): # sleep self.SEND_INTERVAL before send the second message sleep(self.SEND_INTERVAL) def startConnectorWaitTime(self): sleep(10) def verifyWaitTime(self): # sleep two minutes before verify result in SF DB print(datetime.now().strftime("\n%H:%M:%S "), "=== Sleep {} secs before verify result in Snowflake DB ===". format(self.VERIFY_INTERVAL), flush=True) sleep(self.VERIFY_INTERVAL) def verifyWithRetry(self, func, round): retryNum = 0 while retryNum < self.MAX_RETRY: try: func(round) break except test_suit.test_utils.ResetAndRetry: retryNum = 0 print(datetime.now().strftime("%H:%M:%S "), "=== Reset retry count and retry ===", flush=True) except test_suit.test_utils.RetryableError as e: retryNum += 1 print(datetime.now().strftime("%H:%M:%S "), "=== Failed, retryable. {}===".format(e.msg), flush=True) self.verifyWaitTime() except test_suit.test_utils.NonRetryableError as e: print(datetime.now().strftime("\n%H:%M:%S "), "=== Non retryable error raised ===\n{}".format(e.msg), flush=True) raise test_suit.test_utils.NonRetryableError() except snowflake.connector.errors.ProgrammingError as e: if e.errno == 2003: retryNum += 1 print(datetime.now().strftime("%H:%M:%S "), "=== Failed, table not created ===", flush=True) self.verifyWaitTime() else: raise if retryNum == self.MAX_RETRY: print(datetime.now().strftime("\n%H:%M:%S "), "=== Max retry exceeded ===", flush=True) raise test_suit.test_utils.NonRetryableError() def createTopics(self, topicName, partitionNum=1, replicationNum=1): self.adminClient.create_topics( [NewTopic(topicName, partitionNum, replicationNum)]) def sendBytesData(self, topic, value, key=[], partition=0, headers=[]): if len(key) == 0: for i, v in enumerate(value): self.producer.produce(topic, value=v, partition=partition, headers=headers) if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0: self.producer.flush() else: for i, (k, v) in enumerate(zip(key, value)): self.producer.produce(topic, value=v, key=k, partition=partition, headers=headers) if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0: self.producer.flush() self.producer.flush() def sendAvroSRData(self, topic, value, value_schema, key=[], key_schema="", partition=0): if len(key) == 0: for i, v in enumerate(value): self.avroProducer.produce(topic=topic, value=v, value_schema=value_schema, partition=partition) if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0: self.producer.flush() else: for i, (k, v) in enumerate(zip(key, value)): self.avroProducer.produce(topic=topic, value=v, value_schema=value_schema, key=k, key_schema=key_schema, partition=partition) if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0: self.producer.flush() self.avroProducer.flush() def cleanTableStagePipe(self, connectorName, topicName="", partitionNumber=1): if topicName == "": topicName = connectorName tableName = topicName stageName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_STAGE_{}".format( connectorName, topicName) print(datetime.now().strftime("\n%H:%M:%S "), "=== Drop table {} ===".format(tableName)) self.snowflake_conn.cursor().execute( "DROP table IF EXISTS {}".format(tableName)) print(datetime.now().strftime("%H:%M:%S "), "=== Drop stage {} ===".format(stageName)) self.snowflake_conn.cursor().execute( "DROP stage IF EXISTS {}".format(stageName)) for p in range(partitionNumber): pipeName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_PIPE_{}_{}".format( connectorName, topicName, p) print(datetime.now().strftime("%H:%M:%S "), "=== Drop pipe {} ===".format(pipeName)) self.snowflake_conn.cursor().execute( "DROP pipe IF EXISTS {}".format(pipeName)) print(datetime.now().strftime("%H:%M:%S "), "=== Done ===", flush=True) def verifyStageIsCleaned(self, connectorName, topicName=""): if topicName == "": topicName = connectorName stageName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_STAGE_{}".format( connectorName, topicName) res = self.snowflake_conn.cursor().execute( "list @{}".format(stageName)).fetchone() if res is not None: raise RetryableError("stage not cleaned up ") # validate content match gold regex def regexMatchOneLine(self, res, goldMetaRegex, goldContentRegex): meta = res[0].replace(" ", "").replace("\n", "") content = res[1].replace(" ", "").replace("\n", "") goldMetaRegex = "^" + goldMetaRegex.replace("\"", "\\\"").replace("{", "\\{").replace("}", "\\}") \ .replace("[", "\\[").replace("]", "\\]").replace("+", "\\+") + "$" goldContentRegex = "^" + goldContentRegex.replace("\"", "\\\"").replace("{", "\\{").replace("}", "\\}") \ .replace("[", "\\[").replace("]", "\\]").replace("+", "\\+") + "$" if re.search(goldMetaRegex, meta) is None: raise test_suit.test_utils.NonRetryableError( "Record meta data:\n{}\ndoes not match gold regex " "label:\n{}".format(meta, goldMetaRegex)) if re.search(goldContentRegex, content) is None: raise test_suit.test_utils.NonRetryableError( "Record content:\n{}\ndoes not match gold regex " "label:\n{}".format(content, goldContentRegex)) def updateConnectorConfig(self, fileName, connectorName, configMap): with open('./rest_request_generated/' + fileName + '.json') as f: c = json.load(f) config = c['config'] for k in configMap: config[k] = configMap[k] requestURL = "http://{}/connectors/{}/config".format( self.kafkaConnectAddress, connectorName) r = requests.put(requestURL, json=config, headers=self.httpHeader) print(datetime.now().strftime("%H:%M:%S "), r, " updated connector config") def restartConnector(self, connectorName): requestURL = "http://{}/connectors/{}/restart".format( self.kafkaConnectAddress, connectorName) r = requests.post(requestURL, headers=self.httpHeader) print(datetime.now().strftime("%H:%M:%S "), r, " restart connector") def pauseConnector(self, connectorName): requestURL = "http://{}/connectors/{}/pause".format( self.kafkaConnectAddress, connectorName) r = requests.put(requestURL, headers=self.httpHeader) print(datetime.now().strftime("%H:%M:%S "), r, " pause connector") def resumeConnector(self, connectorName): requestURL = "http://{}/connectors/{}/resume".format( self.kafkaConnectAddress, connectorName) r = requests.put(requestURL, headers=self.httpHeader) print(datetime.now().strftime("%H:%M:%S "), r, " resume connector") def deleteConnector(self, connectorName): requestURL = "http://{}/connectors/{}".format(self.kafkaConnectAddress, connectorName) r = requests.delete(requestURL, headers=self.httpHeader) print(datetime.now().strftime("%H:%M:%S "), r, " delete connector") def closeConnector(self, fileName, nameSalt): snowflake_connector_name = fileName.split(".")[0] + nameSalt delete_url = "http://{}/connectors/{}".format( self.kafkaConnectAddress, snowflake_connector_name) print(datetime.now().strftime("\n%H:%M:%S "), "=== Delete connector {} ===".format(snowflake_connector_name)) code = requests.delete(delete_url, timeout=10).status_code print(datetime.now().strftime("%H:%M:%S "), code) def createConnector(self, fileName, nameSalt): rest_template_path = "./rest_request_template" rest_generate_path = "./rest_request_generated" with open(self.credentialPath) as f: credentialJson = json.load(f) testHost = credentialJson["host"] testUser = credentialJson["user"] testDatabase = credentialJson["database"] testSchema = credentialJson["schema"] pk = credentialJson["private_key"] print( datetime.now().strftime("\n%H:%M:%S "), "=== generate sink connector rest reqeuest from {} ===".format( rest_template_path)) if not os.path.exists(rest_generate_path): os.makedirs(rest_generate_path) snowflake_connector_name = fileName.split(".")[0] + nameSalt print( datetime.now().strftime("\n%H:%M:%S "), "=== Connector Config JSON: {}, Connector Name: {} ===".format( fileName, snowflake_connector_name)) with open("{}/{}".format(rest_template_path, fileName), 'r') as f: config = f.read() \ .replace("SNOWFLAKE_PRIVATE_KEY", pk) \ .replace("SNOWFLAKE_HOST", testHost) \ .replace("SNOWFLAKE_USER", testUser) \ .replace("SNOWFLAKE_DATABASE", testDatabase) \ .replace("SNOWFLAKE_SCHEMA", testSchema) \ .replace("CONFLUENT_SCHEMA_REGISTRY", self.schemaRegistryAddress) \ .replace("SNOWFLAKE_TEST_TOPIC", snowflake_connector_name) \ .replace("SNOWFLAKE_CONNECTOR_NAME", snowflake_connector_name) with open("{}/{}".format(rest_generate_path, fileName), 'w') as fw: fw.write(config) MAX_RETRY = 20 retry = 0 delete_url = "http://{}/connectors/{}".format( self.kafkaConnectAddress, snowflake_connector_name) post_url = "http://{}/connectors".format(self.kafkaConnectAddress) while retry < MAX_RETRY: try: code = requests.delete(delete_url, timeout=10).status_code if code == 404 or code == 200 or code == 201: break except: pass print( datetime.now().strftime("\n%H:%M:%S "), "=== sleep for 30 secs to wait for kafka connect to accept connection ===" ) sleep(30) retry += 1 if retry == MAX_RETRY: errorExit( "\n=== max retry exceeded, kafka connect not ready in 10 mins ===" ) r = requests.post(post_url, json=json.loads(config), headers=self.httpHeader) print(datetime.now().strftime("%H:%M:%S "), json.loads(r.content.decode("utf-8"))["name"], r.status_code)
class Uploader(): def __init__(self, host, port, api_key, experiment, run_id): # Store some variables self.host = host self.port = port self.experiment = experiment self.run_id = run_id self.api_key = api_key self.rank_id = ''.join( random.choice(string.ascii_lowercase) for i in range(8)) # Connect to the Kafka broker self.kafka_producer = Producer({ 'bootstrap.servers': self.host + ':' + str(self.port), 'sasl.username': '******', 'sasl.password': api_key, 'security.protocol': 'sasl_plaintext', 'sasl.mechanism': 'PLAIN', }) # Announce the run announcement = { 'type': MType.ANNOUNCE_CREATE.value, 'experiment': self.experiment, 'run_id': self.run_id, 'rank_id': self.rank_id, } self.kafka_producer.produce('announce', key=str(time.time()), value=msgpack.packb(announcement)) self.kafka_producer.flush(30) # Register the at_exit death call atexit.register(self.cleanup) def __call__(self, frame): # Publish the frame on the topic self.kafka_producer.produce(str(self.run_id), key=str(time.time()), value=msgpack.packb(frame)) return 200 def flush(self, timeout=10): self.kafka_producer.flush(timeout) def cleanup(self, ): announcement = { 'type': MType.ANNOUNCE_DIE.value, 'experiment': self.experiment, 'run_id': self.run_id, 'rank_id': self.rank_id, } self.kafka_producer.produce('announce', key=str(time.time()), value=msgpack.packb(announcement)) self.kafka_producer.flush()
class KafkaHelper(object): def __init__(self, target_landscape='custom', config_section=None): self.kafka_config = { 'bootstrap.servers': svt.conf.get('kafka', 'bootstrap.servers') } self.producer = None log.info("Initialized new KafkaHelper object with config: " f"{self.kafka_config}") def publish(self, topic: str, message: Union[dict, str]) -> None: """Posts the passed message to the target Kafka topic. :param topic: Identifier of the target topic :param message: Message in a dictionary or string format """ assert isinstance(message, str) or isinstance(message, dict) if not self.producer: self.producer = Producer(self.kafka_config) if isinstance(message, dict): message = json.dumps(message) # Asynchronous message producing self.producer.produce(topic, message.encode('utf-8')) self.producer.flush() log.info(f"Posted a document to kafka topic: {topic}") def consume_forever(self, group_id: str, topics: List[str], callback_functions: List[Callable]) -> None: """ :param group_id: :param topics: :param callback_functions: :return: """ assert len(topics) == len(callback_functions) callbacks = dict(zip(topics, callback_functions)) self.kafka_config.update({ 'group.id': group_id, 'auto.offset.reset': 'earliest' }) c = Consumer(self.kafka_config) c.subscribe(topics) # Read messages try: while True: msg = c.poll(timeout=1.0) if not msg: log.info( "There was no message on the subscribed Kafka topics!") elif msg.error(): raise KafkaException(msg.error()) else: message = json.loads(msg.value().decode('utf-8')) callbacks[msg.topic()](message) except Exception as error: log.error( f"Unexpected event occurred! Error: {traceback.format_exc()}") finally: # Shut down the consumer to commit the current offsets c.close()
class CompetitionProducer: """ """ daemon = True producer = None def __init__(self, server): conf = {'bootstrap.servers': server} self.producer = Producer(conf) # Create producer # message must be in byte format def send(self, topic, message): self.producer.produce(topic, message) # Sending messages to a certain topic self.producer.poll(timeout=0) def main(self, topic, initial_batch, items, predictions, initial_training_time, batch_size, time_interval, predictions_time_interval, spark_topic, competition_id): """ Recreates the stream. Sends the data in batches: first test (without the target value) and then train batches. All batches are sent according to the time intervals set for the current competition. :param topic: :param initial_batch: :param items: :param predictions: :param initial_training_time: :param batch_size: :param time_interval: :param predictions_time_interval: :param spark_topic: :param competition_id: :return: """ for item in initial_batch: try: # Send row by row from initial batch as json self.send(topic, orjson.dumps(item)) except Exception as e: # Check if topic exists, if not, create it and then send print(e) # After sending initial batch, sleep for initial training time time.sleep(int(initial_training_time)) # Creating lists of batch size, one for test items with just values and second with predictions for training test_groups = list(self.chunker(items, batch_size)) train_groups = list(self.chunker(predictions, batch_size)) i = -1 # Accessing each group in the list test_groups for group in test_groups: # In parallel accessing the predictions # Adding tag, deadline and released at to every item in train group / prediction released_at = datetime.datetime.now() # for item in test group add tag, deadline and released for item in group: item['tag'] = 'TEST' item['Deadline'] = str(released_at + datetime.timedelta(seconds=int(predictions_time_interval))) item['Released'] = str(released_at) item['competition_id'] = str(competition_id) # Sending testing items try: self.send(topic, orjson.dumps(item)) except Exception as e: print(e) i = i + 1 train_group = train_groups[i] for item in train_group: deadline = released_at + datetime.timedelta(seconds=int(predictions_time_interval)) item['Deadline'] = deadline.strftime("%Y-%m-%d %H:%M:%S") item['Released'] = released_at.strftime("%Y-%m-%d %H:%M:%S") item['competition_id'] = competition_id try: self.send(spark_topic, orjson.dumps(item)) except Exception as e: print(e) time.sleep(time_interval) for item in train_group: item['tag'] = 'TRAIN' item['Deadline'] = released_at + datetime.timedelta(seconds=int(predictions_time_interval)) item['Released'] = released_at try: self.send(topic, orjson.dumps(item, default=json_util.default)) except Exception as e: print(e) time.sleep(time_interval) self.producer.flush() @staticmethod def chunker(seq, size): """ Returns data in chunks (batches) of a given size. """ return (seq[pos:pos + size] for pos in range(0, len(seq), size)) @staticmethod def is_not_empty(row): """Check if row is empty.""" return all(item == "" for item in row) def create_competition(self, competition, items, predictions, initial_batch): """Create a competition and start releasing the data stream.""" self.main( topic=competition.name.lower().replace(" ", ""), initial_training_time=competition.initial_training_time, initial_batch=initial_batch, items=items, predictions=predictions, batch_size=competition.batch_size, time_interval=competition.time_interval, predictions_time_interval=competition.predictions_time_interval, spark_topic=competition.name.lower().replace(" ", "") + 'spark_train', competition_id=competition.competition_id)
class KafkaStreamingClient(AbstractStreamingClient): """Kafka streaming client.""" def __init__(self, config): # pragma: no cover """ Streaming client implementation based on Kafka. Configuration keys: KAFKA_ADDRESS KAFKA_CONSUMER_GROUP KAFKA_TOPIC TIMEOUT EVENTHUB_KAFKA_CONNECTION_STRING """ self.logger = Logger() self.topic = config.get("KAFKA_TOPIC") if config.get("TIMEOUT"): try: self.timeout = int(config.get("TIMEOUT")) except ValueError: self.timeout = None else: self.timeout = None kafka_config = self.create_kafka_config(config) self.admin = admin.AdminClient(kafka_config) if config.get("KAFKA_CONSUMER_GROUP") is None: self.logger.info('Creating Producer') self.producer = Producer(kafka_config) else: self.logger.info('Creating Consumer') self.consumer = Consumer(kafka_config) @staticmethod def create_kafka_config(user_config: dict) -> dict: # pragma: no cover """Create the kafka configuration.""" config = { "bootstrap.servers": user_config.get("KAFKA_ADDRESS"), "enable.auto.commit": False, "auto.offset.reset": "earliest", "default.topic.config": { 'auto.offset.reset': 'smallest' }, } if user_config.get('EVENTHUB_KAFKA_CONNECTION_STRING'): ssl_location = user_config.get( 'SSL_CERT_LOCATION') or '/etc/ssl/certs/ca-certificates.crt' eventhub_config = { 'security.protocol': "SASL_SSL", 'sasl.mechanism': "PLAIN", 'ssl.ca.location': ssl_location, 'sasl.username': '******', 'sasl.password': user_config.get('EVENTHUB_KAFKA_CONNECTION_STRING'), 'client.id': 'agogosml', } config = {**config, **eventhub_config} if user_config.get('KAFKA_CONSUMER_GROUP') is not None: config['group.id'] = user_config['KAFKA_CONSUMER_GROUP'] if user_config.get('KAFKA_DEBUG') is not None: config['debug'] = user_config['KAFKA_DEBUG'] return config def delivery_report(self, err, msg): # pragma: no cover """ Indicate delivery result. Called once for each message produced. Triggered by poll() or flush(). :param err: An error message. :param msg: A string input to be uploaded to kafka. """ if err is not None: self.logger.error('Message delivery failed: %s', err) else: self.logger.info('Message delivered to %s [%s]', msg.topic(), msg.partition()) def send(self, message: str): # pragma: no cover if not isinstance(message, str): raise TypeError('str type expected for message') try: mutated_message = message.encode('utf-8') self.logger.info('Sending message to kafka topic: %s', self.topic) self.producer.poll(0) self.producer.produce(self.topic, mutated_message, callback=self.delivery_report) self.producer.flush() return True except Exception as ex: self.logger.error('Error sending message to kafka: %s', ex) return False def stop(self): # pragma: no cover pass def check_timeout(self, start: datetime): # pragma: no cover """Interrupts if too much time has elapsed since the kafka client started running.""" if self.timeout is not None: elapsed = datetime.now() - start if elapsed.seconds >= self.timeout: raise KeyboardInterrupt def handle_kafka_error(self, msg): # pragma: no cover """Handle an error in kafka.""" if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event self.logger.info('%% %s [%d] reached end at offset %d\n', msg.topic(), msg.partition(), msg.offset()) else: # Error raise KafkaException(msg.error()) def start_receiving(self, on_message_received_callback): # pragma: no cover try: self.subscribe_to_topic() start = datetime.now() while True: # Stop loop after timeout if exists self.check_timeout(start) # Poll messages from topic msg = self.read_single_message() if msg is not None: on_message_received_callback(msg) except KeyboardInterrupt: self.logger.info('Aborting listener...') finally: # Close down consumer to commit final offsets. self.consumer.close() def subscribe_to_topic(self): # pragma: no cover """Subscribe to topic.""" self.consumer.subscribe([self.topic]) def read_single_message(self): # pragma: no cover """Poll messages from topic.""" msg = self.consumer.poll(0.000001) if msg is None: return None if msg.error(): # Error or event self.handle_kafka_error(msg) return None # Proper message # self.logger.info('kafka read message: %s, from topic: %s', msg.value(), msg.topic()) self.consumer.commit(msg) return msg.value()
def test_consumer_start_from_committed_offset(): consumer_group = "consumer-{}".format(uuid.uuid1().hex) synchronize_commit_group = "consumer-{}".format(uuid.uuid1().hex) messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer({ "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "on_delivery": record_message_delivered, }) with create_topic() as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(3): producer.produce(topic, "{}".format(i).encode("utf8")) assert producer.flush( 5) == 0, "producer did not successfully flush queue" Consumer({ "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "group.id": consumer_group }).commit(message=messages_delivered[topic][0], asynchronous=False) # Create the synchronized consumer. consumer = SynchronizedConsumer( bootstrap_servers=os.environ["SENTRY_KAFKA_HOSTS"], consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) assignments_received = [] def on_assign(c, assignment): assert c is consumer assignments_received.append(assignment) consumer.subscribe([topic], on_assign=on_assign) # Wait until we have received our assignments. for i in xrange(10): # this takes a while assert consumer.poll(1) is None if assignments_received: break assert len(assignments_received ) == 1, "expected to receive partition assignment" assert set((i.topic, i.partition) for i in assignments_received[0]) == set([(topic, 0)]) # TODO: Make sure that all partitions are paused on assignment. # Move the committed offset forward for our synchronizing group. message = messages_delivered[topic][0] producer.produce( commit_log_topic, key="{}:{}:{}".format(message.topic(), message.partition(), synchronize_commit_group).encode("utf8"), value="{}".format(message.offset() + 1).encode("utf8"), ) # Make sure that there are no messages ready to consume. assert consumer.poll(1) is None # Move the committed offset forward for our synchronizing group. message = messages_delivered[topic][0 + 1] # second message producer.produce( commit_log_topic, key="{}:{}:{}".format(message.topic(), message.partition(), synchronize_commit_group).encode("utf8"), value="{}".format(message.offset() + 1).encode("utf8"), ) assert producer.flush( 5) == 0, "producer did not successfully flush queue" # We should have received a single message. # TODO: Can we also assert that the position is unpaused?) for i in xrange(5): message = consumer.poll(1) if message is not None: break assert message is not None, "no message received" expected_message = messages_delivered[topic][0 + 1] # second message assert message.topic() == expected_message.topic() assert message.partition() == expected_message.partition() assert message.offset() == expected_message.offset() # We should not be able to continue reading into the topic. # TODO: Can we assert that the position is paused? assert consumer.poll(1) is None
class KafkaProducer(GenericProducer): """Kafka Single Topic Producer. Parameters ---------- PARAMS: dict Parameters passed to :class:`confluent_kafka.Producer` The required parameters are: - *bootstrap.servers*: comma separated <host:port> :class:`string` to brokers. TOPIC: string Kafka fixed output topic. *Example:* Depending on the step configuration the producer config can be passsed in different ways, the recommended one is passing it on the `STEP_CONFIG` variable. .. code-block:: python #settings.py PRODUCER_CONFIG = { "PARAMS": { "bootstrap.servers": "kafka1:9092, kafka2:9092", }, "TOPIC": "test_topic" } STEP_CONFIG = { ... "PRODUCER_CONFIG": PRODUCER_CONFIG } If multiple producers are required, the varible inside `STEP_CONFIG` can be changed to "PRODUCER1_CONFIG", "PRODUCER2_CONFIG", etc. TOPIC_STRATEGY: dict Using a topic strategy instead of a fixed topic. Similar to the consumers topic strategy, the required parameters are: - *CLASS*: `apf.core.topic_management.GenericTopicStrategy` class to be used. - *PARAMS*: Parameters passed to *CLASS* object. **Example:** Produce to a topic that updates on 23 hours UTC every day. .. code-block:: python #settings.py PRODUCER_CONFIG = { ... "TOPIC_STRATEGY": { "CLASS": "apf.core.topic_management.DailyTopicStrategy", "PARAMS": { "topic_format": "test_%s", "date_format": "%Y%m%d", "change_hour": 23 } } } STEP_CONFIG = { ... "PRODUCER_CONFIG": PRODUCER_CONFIG } SCHEMA: dict AVRO Output Schema `(AVRO Schema Definition) <https://avro.apache.org/docs/current/gettingstartedpython.html#Defining+a+schema>`_ **Example:** .. code-block:: python #settings.py PRODUCER_CONFIG = { ... "SCHEMA": { "namespace": "example.avro", "type": "record", "name": "User", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"]}, {"name": "favorite_color", "type": ["string", "null"]} ] } } """ def __init__(self,config): super().__init__(config=config) self.producer = Producer(self.config["PARAMS"]) self.schema = self.config["SCHEMA"] self.schema = fastavro.parse_schema(self.schema) self.dynamic_topic = False if self.config.get("TOPIC"): self.logger.info(f'Producing to {self.config["TOPIC"]}') self.topic = [self.config["TOPIC"]] elif self.config.get("TOPIC_STRATEGY"): self.dynamic_topic = True module_name, class_name = self.config["TOPIC_STRATEGY"]["CLASS"].rsplit(".", 1) TopicStrategy = getattr(importlib.import_module(module_name), class_name) self.topic_strategy = TopicStrategy(**self.config["TOPIC_STRATEGY"]["PARAMS"]) self.topic = self.topic_strategy.get_topic() self.logger.info(f'Using {self.config["TOPIC_STRATEGY"]}') self.logger.info(f'Producing to {self.topic}') self.consumer.subscribe(self.topic) def produce(self,message=None): """Produce Message to a topic. """ out = io.BytesIO() fastavro.writer(out, self.schema, [message]) avro_message = out.getvalue() if self.dynamic_topic: topics = self.topic_strategy.get_topic() if self.topic != topics: self.topic = topics for topic in self.topic: self.producer.produce(topic,avro_message) def __del__(self): self.logger.info("Waiting to produce last messages") self.producer.flush()
def test_ingester(self): init_db_sync(config=config, verbose=True) log("Setting up paths") # path_kafka = pathlib.Path(config["path"]["kafka"]) path_logs = pathlib.Path(config["path"]["logs"]) if not path_logs.exists(): path_logs.mkdir(parents=True, exist_ok=True) if config["misc"]["broker"]: log("Setting up test groups and filters in Fritz") program = Program(group_name="FRITZ_TEST", group_nickname="test") Filter( collection="ZTF_alerts", group_id=program.group_id, filter_id=program.filter_id, ) program2 = Program(group_name="FRITZ_TEST_AUTOSAVE", group_nickname="test2") Filter( collection="ZTF_alerts", group_id=program2.group_id, filter_id=program2.filter_id, autosave=True, pipeline=[{"$match": {"objectId": "ZTF20aaelulu"}}], ) program3 = Program( group_name="FRITZ_TEST_UPDATE_ANNOTATIONS", group_nickname="test3" ) Filter( collection="ZTF_alerts", group_id=program3.group_id, filter_id=program3.filter_id, update_annotations=True, pipeline=[ {"$match": {"objectId": "ZTF20aapcmur"}} ], # there are 3 alerts in the test set for this oid ) # clean up old Kafka logs log("Cleaning up Kafka logs") subprocess.run(["rm", "-rf", path_logs / "kafka-logs", "/tmp/zookeeper"]) log("Starting up ZooKeeper at localhost:2181") # start ZooKeeper in the background cmd_zookeeper = [ os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-start.sh"), "-daemon", os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"), ] with open(path_logs / "zookeeper.stdout", "w") as stdout_zookeeper: # p_zookeeper = subprocess.run( cmd_zookeeper, stdout=stdout_zookeeper, stderr=subprocess.STDOUT ) # take a nap while it fires up time.sleep(3) log("Starting up Kafka Server at localhost:9092") # start the Kafka server: cmd_kafka_server = [ os.path.join(config["path"]["kafka"], "bin", "kafka-server-start.sh"), "-daemon", os.path.join(config["path"]["kafka"], "config", "server.properties"), ] with open( os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w" ) as stdout_kafka_server: # p_kafka_server = subprocess.Popen(cmd_kafka_server, stdout=stdout_kafka_server, stderr=subprocess.STDOUT) # p_kafka_server = subprocess.run(cmd_kafka_server) # take a nap while it fires up time.sleep(3) # get kafka topic names with kafka-topics command cmd_topics = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--zookeeper", config["kafka"]["zookeeper.test"], "-list", ] topics = ( subprocess.run(cmd_topics, stdout=subprocess.PIPE) .stdout.decode("utf-8") .split("\n")[:-1] ) log(f"Found topics: {topics}") # create a test ZTF topic for the current UTC date date = datetime.datetime.utcnow().strftime("%Y%m%d") topic_name = f"ztf_{date}_programid1_test" if topic_name in topics: # topic previously created? remove first cmd_remove_topic = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--zookeeper", config["kafka"]["zookeeper.test"], "--delete", "--topic", topic_name, ] # print(kafka_cmd) remove_topic = ( subprocess.run(cmd_remove_topic, stdout=subprocess.PIPE) .stdout.decode("utf-8") .split("\n")[:-1] ) log(f"{remove_topic}") log(f"Removed topic: {topic_name}") time.sleep(1) if topic_name not in topics: log(f"Creating topic {topic_name}") cmd_create_topic = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--create", "--bootstrap-server", config["kafka"]["bootstrap.test.servers"], "--replication-factor", "1", "--partitions", "1", "--topic", topic_name, ] with open( os.path.join(config["path"]["logs"], "create_topic.stdout"), "w" ) as stdout_create_topic: # p_create_topic = \ subprocess.run( cmd_create_topic, stdout=stdout_create_topic, stderr=subprocess.STDOUT, ) log("Starting up Kafka Producer") # spin up Kafka producer producer = Producer( {"bootstrap.servers": config["kafka"]["bootstrap.test.servers"]} ) # small number of alerts that come with kowalski path_alerts = pathlib.Path("/app/data/ztf_alerts/20200202/") # grab some more alerts from gs://ztf-fritz/sample-public-alerts try: log("Grabbing more alerts from gs://ztf-fritz/sample-public-alerts") r = requests.get("https://www.googleapis.com/storage/v1/b/ztf-fritz/o") aa = r.json()["items"] ids = [pathlib.Path(a["id"]).parent for a in aa if "avro" in a["id"]] except Exception as e: log( "Grabbing alerts from gs://ztf-fritz/sample-public-alerts failed, but it is ok" ) log(f"{e}") ids = [] subprocess.run( [ "gsutil", "-m", "cp", "-n", "gs://ztf-fritz/sample-public-alerts/*.avro", "/app/data/ztf_alerts/20200202/", ] ) log(f"Fetched {len(ids)} alerts from gs://ztf-fritz/sample-public-alerts") # push! for p in path_alerts.glob("*.avro"): with open(str(p), "rb") as data: # Trigger any available delivery report callbacks from previous produce() calls producer.poll(0) log(f"Pushing {p}") # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. producer.produce(topic_name, data.read(), callback=delivery_report) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. producer.flush() log("Starting up Ingester") # digest and ingest watchdog(obs_date=date, test=True) log("Digested and ingested: all done!") # shut down Kafka server and ZooKeeper time.sleep(20) log("Shutting down Kafka Server at localhost:9092") # start the Kafka server: cmd_kafka_server_stop = [ os.path.join(config["path"]["kafka"], "bin", "kafka-server-stop.sh"), os.path.join(config["path"]["kafka"], "config", "server.properties"), ] with open( os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w" ) as stdout_kafka_server: # p_kafka_server_stop = \ subprocess.run( cmd_kafka_server_stop, stdout=stdout_kafka_server, stderr=subprocess.STDOUT, ) log("Shutting down ZooKeeper at localhost:2181") cmd_zookeeper_stop = [ os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-stop.sh"), os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"), ] with open( os.path.join(config["path"]["logs"], "zookeeper.stdout"), "w" ) as stdout_zookeeper: # p_zookeeper_stop = \ subprocess.run( cmd_zookeeper_stop, stdout=stdout_zookeeper, stderr=subprocess.STDOUT ) log("Checking the ZTF alert collection states") mongo = Mongo( host=config["database"]["host"], port=config["database"]["port"], replica_set=config["database"]["replica_set"], username=config["database"]["username"], password=config["database"]["password"], db=config["database"]["db"], verbose=True, ) collection_alerts = config["database"]["collections"]["alerts_ztf"] collection_alerts_aux = config["database"]["collections"]["alerts_ztf_aux"] n_alerts = mongo.db[collection_alerts].count_documents({}) assert n_alerts == 313 n_alerts_aux = mongo.db[collection_alerts_aux].count_documents({}) assert n_alerts_aux == 145 if config["misc"]["broker"]: log("Checking that posting to SkyPortal succeeded") # check number of candidates that passed the first filter resp = requests.get( program.base_url + f"/api/candidates?groupIDs={program.group_id}", headers=program.headers, timeout=3, ) assert resp.status_code == requests.codes.ok result = resp.json() assert result["status"] == "success" assert "data" in result assert "totalMatches" in result["data"] assert result["data"]["totalMatches"] == 88 # check that the only candidate that passed the second filter (ZTF20aaelulu) got saved as Source resp = requests.get( program2.base_url + f"/api/sources?group_ids={program2.group_id}", headers=program2.headers, timeout=3, ) assert resp.status_code == requests.codes.ok result = resp.json() assert result["status"] == "success" assert "data" in result assert "totalMatches" in result["data"] assert result["data"]["totalMatches"] == 1 assert "sources" in result["data"] assert result["data"]["sources"][0]["id"] == "ZTF20aaelulu"
def send(self, message): p = Producer({'boostrap.server': settings.KAFKA['bootstrap.servers']}) p.produce('CoinPrices', key='coin', value=message) p.flush(30)
def producer_trigger(raw_data, context): state_stats_url = ('https://api.covid19india.org/data.json') district_stats_url = ('https://api.covid19india.org/v2/state_district_wise.json') bootstrap_servers = "localhost:9092" kafka_district_data_topic_name = "district-data" kafka_processed_data_topic_name = "processed-data" conf = {'bootstrap.servers': bootstrap_servers} producer = Producer(conf, logger=logger) # import raw district data district_data = requests.get(district_stats_url).json() for data in district_data: state = data['state'] district_data = data['districtData'] for dd in district_data: district = dd['district'] key = dict({'state': state, 'district': district}) value = dict({'state': state, 'district': district, 'active': dd['active'], 'confirmed': dd['confirmed'], 'recovered': dd['recovered'], 'deceased': dd['deceased'], 'deltaConfirmed': dd['delta']['confirmed'], 'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'], 'notes': dd['notes'] }) try: producer.produce(topic=kafka_district_data_topic_name, value=json.dumps(value), key=json.dumps(key), on_delivery=fail) except BufferError: logger.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer)) producer.poll(0) logger.info('%% Waiting for %d deliveries\n' % len(producer)) producer.flush() district_data = requests.get(district_stats_url).json() for data in district_data: state = data['state'] district_data = data['districtData'] finalDict = {} for dd in district_data: district = dd['district'] key = dict({'state': state, 'district': district}) if(dd['active'] < 200): finalDict.update({ 'low_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'],'confirmed': dd['confirmed'], 'recovered': dd['recovered'], 'deceased': dd['deceased'], 'deltaConfirmed': dd['delta']['confirmed'], 'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'], 'notes': dd['notes']})}) elif(dd['active'] > 200 and dd['active'] < 800): finalDict.update({ 'moderate_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'] , 'confirmed': dd['confirmed'], 'recovered': dd['recovered'], 'deceased': dd['deceased'], 'deltaConfirmed': dd['delta']['confirmed'], 'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'], 'notes': dd['notes']})}) elif(dd['active'] > 800): finalDict.update({ 'high_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'] , 'confirmed': dd['confirmed'], 'recovered': dd['recovered'], 'deceased': dd['deceased'], 'deltaConfirmed': dd['delta']['confirmed'], 'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'], 'notes': dd['notes']})}) try: producer.produce(topic=kafka_processed_data_topic_name, value=json.dumps(finalDict), key=json.dumps(key), on_delivery=fail) except BufferError: logger.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer)) producer.poll(0) logger.info('%% Waiting for %d deliveries\n' % len(producer)) producer.flush()
import pyodbc from confluent_kafka import Producer print ('---login--- ') conn = pyodbc.connect("DRIVER={ODBC Driver 17 for SQL Server};SERVER=DevSQL01;DATABASE=HackStream;UID=greatscott;PWD=H4ppyFunB4ll;") cursor = conn.cursor() cursor.execute('SELECT MAX(ActionId) FROM HackStream.dbo.Funnel') for row in cursor: print (row) print ('--end--') p = Producer({'bootstrap.servers': '172.16.43.33:9092'}) p.produce('Funnel', key='hello', value='world') p.flush(10)
class DocManager(DocManagerBase): """ DocManager that echoes MongoDB Oplog to Kafka. """ _topic_prefix = 'db.mongo.' def __init__(self, url, **kwargs): """ Sets up producer connection to Kafka. Parameters ---------- url : str Directly corresponds to the "bootstrap.servers" config when initializing a Kafka entity """ self.producer = Producer({'bootstrap.servers': url}) def commit(self): self.producer.flush() def get_last_doc(self): """ TODO: For now, this returns nothing. """ pass def remove(self, document_id, namespace, timestamp): """ Sends a remove message to the corresponding kafka topic. Parameters ---------- document_id : str namespace : str timestamp : bson.timestamp.Timestamp """ msg_topic = self._get_topic(namespace) msg_key = document_id msg_val = json_dumps({ 'op': 'remove', 'o': document_id, 'ts': timestamp, }) return self._produce(msg_topic, msg_key, msg_val) def search(self, start_ts, end_ts): """ TODO: For now, this returns an empty iterator. """ return iter([]) def stop(self): self.producer.flush() def update(self, document_id, update_spec, namespace, timestamp): """ Sends an update message to the corresponding kafka topic. Parameters ---------- document_id : str update_spec : dict namespace : str timestamp : bson.timestamp.Timestamp """ msg_topic = self._get_topic(namespace) msg_key = document_id msg_val = json_dumps({ 'op': 'update', 'o': update_spec, 'o2': document_id, 'ts': timestamp, }) return self._produce(msg_topic, msg_key, msg_val) def upsert(self, document, namespace, timestamp): """ Sends an upsert message to the corresponding kafka topic. Parameters ---------- document : dict namespace : str timestamp : bson.timestamp.Timestamp """ msg_topic = self._get_topic(namespace) msg_key = document['_id'] msg_val = json_dumps({ 'op': 'upsert', 'o': document, 'ts': timestamp, }) return self._produce(msg_topic, msg_key, msg_val) def _produce(self, topic, key, value): """ Helper method for producing to Kafka. """ return self.producer.produce(topic=topic, key=key, value=value, callback=self._delivery_report) @staticmethod def _get_topic(namespace): """ Returns a Kafka topic name based on given parameters. Parameters ---------- namespace : str """ return '{}.{}'.format(DocManager._topic_prefix, namespace) @staticmethod def _delivery_report(err, msg): if err is None: LOG.info('Message with key {} produced to topic {}: {}'.format( msg.key(), msg.topic(), msg.value())) else: LOG.error( 'Error while delivering message with key {} to topic {}, with value {}:\n{}' .format(msg.key(), msg.topic(), msg.value(), err.str()))
'bootstrap.servers': "kafka:2181", 'group.id': "json_producer" }) time.sleep(10) def delivery_callback (err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d]\n' % \ (msg.topic(), msg.partition())) for tweet in get_tweet('examples/tweets-200k.txt.gz'): # if len(tweet['entities']['urls']) > 0 and \ # any(tweet['lang'] in l for l in ['es', 'en']): try: print("%s: %s" % (tweet['user']['screen_name'], tweet['text'])) kfk.produce( "raw_tweets", json.dumps(tweet), callback=delivery_callback ) kfk.poll(0) kfk.flush() except BufferError as e: sys.stderr.write('%% Local producer queue is full ' \ '(%d messages awaiting delivery): try again\n' % len(kfk))
class KafkaProducer: def __init__(self, logger, cfg, influxdb_client, email_notification): """Конструктор класса Args: logger (TimedRotatingLogger): логер cfg (dict): словарь параметров influxdb_client (InfluxBDProducer): объект для логирования в базу InfluxDB email_notification (EmailNotification): объект для отправки email уведомлений """ self.logger = logger self.cfg = cfg self.influxdb_client = influxdb_client self.email_notification = email_notification self.producer = Producer(self.cfg['kafka_broker']['producer_config']) @staticmethod def delivery_callback(err, msg): if err: raise KafkaException(err) else: pass # sys.stderr.write('Message delivered to {0} [{1}] @ {2}\n'. # format(msg.topic(), msg.partition(), msg.offset())) def write_message(self, topic, key, message, headers): """Запись одного сообщения в очередь Kafka Args: topic (str): имя очереди для записи сообщения key (str): id сообщения (message.id из Traffic) message (str): разобранная сделка с атрибутами и значениями в формате словаря headers (dict): заголовки сообщения """ try: self.producer.produce(topic=topic, key=key, value=message, headers=headers, callback=self.delivery_callback) # синхронная запись сообщений в kafka self.producer.flush() return True except BufferError as be: exc_type, exc_value, exc_traceback = sys.exc_info() self.logger.error( "Local producer queue is full ({0} messages awaiting delivery): try again\n{1}\n{2}" .format(len(self.producer), be, traceback.extract_tb(exc_traceback))) self.influxdb_client.write_error(module="KAFKA_PRODUCER") return False except KafkaException as ke: exc_type, exc_value, exc_traceback = sys.exc_info() self.logger.error( "Error occurred while writing message into Kafka\n{0}\n{1}\n{2}" .format(ke, message, traceback.extract_tb(exc_traceback))) self.influxdb_client.write_error(module="KAFKA_PRODUCER") self.email_notification.send_error_notification() sys.exit(1) except TypeError as te: exc_type, exc_value, exc_traceback = sys.exc_info() self.logger.error( "Error occurred while writing message into Kafka\n{0}\n{1}\n{2}" .format(te, message, traceback.extract_tb(exc_traceback))) self.influxdb_client.write_error(module="KAFKA_PRODUCER") self.email_notification.send_error_notification() sys.exit(1)
def create_app(config=None, testing=False, cli=True): """ Application factory, used to create application """ app = Flask(__name__, static_folder=None) app.port = 5003 # @app.route("/profanity") # def profanity(self): # content = "my message f**k" # make sure this is the actual tweet # url = "https://www.purgomalum.com/service/containsprofanity?text={}".format(content) # profanity = requests.get(url=url) # return {"profanity": profanity.content.decode('UTF8')} c = Consumer( { "bootstrap.servers": "localhost:9092", "group.id": "content_curator_twitter_group_21", "auto.offset.reset": "earliest", } ) p = Producer({"bootstrap.servers": "localhost:9092"}) c.subscribe(["content_curator_twitter"]) while True: msg = c.poll() if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue # print('Received message: {}'.format(msg.value().decode('utf-8'))) try: m = json.loads(msg.value().decode("utf-8")) if "content" in m.keys(): content = m["content"] url = "https://www.purgomalum.com/service/containsprofanity?text={}".format( content ) profanity = requests.get(url=url) profanity_value = json.dumps( {"profanity": profanity.content.decode("utf-8")} ) msg_key = msg.key().decode("utf-8") if msg_key is not None: p.produce( topic="content_curator_twitter", key=msg_key, value=profanity_value, ) p.flush() print("ADDED:", {"key": msg_key, "value": profanity_value}) except Exception as e: print("ERROR:", e) c.close() return app
print() p = Producer({'bootstrap.servers': '127.0.0.1:9092'}) try: for val in range(0, 10): topic = "first_topic" value = "hello from python #{}".format(val) key = "key_{}".format(val) p.produce(topic=topic, key=key, value=value, callback=acked) p.poll(0.5) # Every Key goes to some partition if you rerun the code # key_0 part 2 # key_1 part 0 # key_2 part 1 # key_3 part 2 # key_4 part 1 # key_5 part 2 # key_6 part 0 # key_7 part 0 # key_8 part 1 # key_9 part 0 except KeyboardInterrupt: pass p.produce('first_topic', key=None, value='first from python') p.flush(10)
class KafkaProducer(Producer[TPayload]): def __init__(self, configuration: Mapping[str, Any], codec: Codec[KafkaPayload, TPayload]) -> None: self.__configuration = configuration self.__codec = codec self.__producer = ConfluentProducer(configuration) self.__shutdown_requested = Event() # The worker must execute in a separate thread to ensure that callbacks # are fired -- otherwise trying to produce "synchronously" via # ``produce(...).result()`` could result in a deadlock. self.__result = execute(self.__worker) def __worker(self) -> None: """ Continuously polls the producer to ensure that delivery callbacks are triggered (which correspondingly set the result values on the ``Future`` instances returned by ``produce``.) This function exits after a shutdown request has been issued (via ``close``) and all in-flight messages have been delivered. """ while not self.__shutdown_requested.is_set(): self.__producer.poll(0.1) self.__producer.flush() def __delivery_callback( self, future: Future[Message[TPayload]], payload: TPayload, error: KafkaError, message: ConfluentMessage, ) -> None: if error is not None: future.set_exception(TransportError(error)) else: try: timestamp_type, timestamp_value = message.timestamp() if timestamp_type is TIMESTAMP_NOT_AVAILABLE: raise ValueError("timestamp not available") future.set_result( Message( Partition(Topic(message.topic()), message.partition()), message.offset(), payload, datetime.utcfromtimestamp(timestamp_value / 1000.0), )) except Exception as error: future.set_exception(error) def produce(self, destination: Union[Topic, Partition], payload: TPayload) -> Future[Message[TPayload]]: if self.__shutdown_requested.is_set(): raise RuntimeError("producer has been closed") if isinstance(destination, Topic): produce = partial(self.__producer.produce, topic=destination.name) elif isinstance(destination, Partition): produce = partial( self.__producer.produce, topic=destination.topic.name, partition=destination.index, ) else: raise TypeError("invalid destination type") encoded = self.__codec.encode(payload) future: Future[Message[TPayload]] = Future() future.set_running_or_notify_cancel() produce( value=encoded.value, key=encoded.key, headers=encoded.headers, on_delivery=partial(self.__delivery_callback, future, payload), ) return future def close(self) -> Future[None]: self.__shutdown_requested.set() return self.__result
def test_consumer_rebalance_from_committed_offset(): consumer_group = 'consumer-{}'.format(uuid.uuid1().hex) synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex) messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer({ 'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'], 'on_delivery': record_message_delivered, }) with create_topic( partitions=2) as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(4): producer.produce(topic, '{}'.format(i).encode('utf8'), partition=i % 2) assert producer.flush( 5) == 0, 'producer did not successfully flush queue' Consumer({ 'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'], 'group.id': consumer_group, }).commit( offsets=[ TopicPartition( message.topic(), message.partition(), message.offset() + 1, ) for message in messages_delivered[topic][:2] ], asynchronous=False, ) consumer_a = SynchronizedConsumer( bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'], consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset='earliest', ) assignments_received = defaultdict(list) def on_assign(consumer, assignment): assignments_received[consumer].append(assignment) consumer_a.subscribe([topic], on_assign=on_assign) # Wait until the first consumer has received its assignments. for i in xrange(10): # this takes a while assert consumer_a.poll(1) is None if assignments_received[consumer_a]: break assert len(assignments_received[consumer_a] ) == 1, 'expected to receive partition assignment' assert set( (i.topic, i.partition) for i in assignments_received[consumer_a][0]) == set([(topic, 0), (topic, 1)]) assignments_received[consumer_a].pop() consumer_b = SynchronizedConsumer( bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'], consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset='earliest', ) consumer_b.subscribe([topic], on_assign=on_assign) assignments = {} # Wait until *both* consumers have received updated assignments. for consumer in [consumer_a, consumer_b]: for i in xrange(10): # this takes a while assert consumer.poll(1) is None if assignments_received[consumer]: break assert len(assignments_received[consumer] ) == 1, 'expected to receive partition assignment' assert len( assignments_received[consumer] [0]) == 1, 'expected to have a single partition assignment' i = assignments_received[consumer][0][0] assignments[(i.topic, i.partition)] = consumer assert set(assignments.keys()) == set([(topic, 0), (topic, 1)]) for expected_message in messages_delivered[topic][2:]: consumer = assignments[(expected_message.topic(), expected_message.partition())] # Make sure that there are no messages ready to consume. assert consumer.poll(1) is None # Move the committed offset forward for our synchronizing group. producer.produce( commit_log_topic, key='{}:{}:{}'.format( expected_message.topic(), expected_message.partition(), synchronize_commit_group, ).encode('utf8'), value='{}'.format(expected_message.offset() + 1, ).encode('utf8'), ) assert producer.flush( 5) == 0, 'producer did not successfully flush queue' # We should have received a single message. # TODO: Can we also assert that the position is unpaused?) for i in xrange(5): received_message = consumer.poll(1) if received_message is not None: break assert received_message is not None, 'no message received' assert received_message.topic() == expected_message.topic() assert received_message.partition() == expected_message.partition() assert received_message.offset() == expected_message.offset() # We should not be able to continue reading into the topic. # TODO: Can we assert that the position is paused? assert consumer.poll(1) is None
# 步驟1. 設定要連線到Kafka集群的相關設定 props = { # Kafka集群在那裡? 'bootstrap.servers': 'localhost:9092', # <-- 置換成要連接的Kafka集群 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Producer的實例 producer = Producer(props) # 步驟3. 指定想要發佈訊息的topic名稱 topicName = 'ak03.four_partition' msgCount = 10000 try: print('Start sending messages ...') # produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers]) for i in range(msgCount): producer.produce(topicName, key=str(i), value='msg_'+str(i)) producer.poll(0) # <-- (重要) 呼叫poll來讓client程式去檢查內部的Buffer print('key={}, value={}'.format(str(i), 'msg_' + str(i))) time.sleep(3) # 讓主執行緒停個3秒 print('Send ' + str(msgCount) + ' messages to Kafka') except BufferError as e: # 錯誤處理 sys.stderr.write('%% Local producer queue is full ({} messages awaiting delivery): try again\n' .format(len(producer))) except Exception as e: print(e) # 步驟5. 確認所有在Buffer裡的訊息都己經送出去給Kafka了 producer.flush(10) print('Message sending completed!')
def test_consumer_rebalance_from_uncommitted_offset(): consumer_group = 'consumer-{}'.format(uuid.uuid1().hex) synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex) messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer({ 'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'], 'on_delivery': record_message_delivered, }) with create_topic( partitions=2) as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(4): producer.produce(topic, '{}'.format(i).encode('utf8'), partition=i % 2) assert producer.flush( 5) == 0, 'producer did not successfully flush queue' for (topic, partition), offset in { (message.topic(), message.partition()): message.offset() for message in messages_delivered[topic] }.items(): producer.produce( commit_log_topic, key='{}:{}:{}'.format( topic, partition, synchronize_commit_group, ).encode('utf8'), value='{}'.format(offset + 1, ).encode('utf8'), ) assert producer.flush( 5) == 0, 'producer did not successfully flush queue' consumer_a = SynchronizedConsumer( bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'], consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset='earliest', ) assignments_received = defaultdict(list) def on_assign(consumer, assignment): assignments_received[consumer].append(assignment) consumer_a.subscribe([topic], on_assign=on_assign) consume_until_constraints_met(consumer_a, [ lambda message: assignments_received[consumer_a], collect_messages_recieved(4), ], 10) assert len(assignments_received[consumer_a] ) == 1, 'expected to receive partition assignment' assert set( (i.topic, i.partition) for i in assignments_received[consumer_a][0]) == set([(topic, 0), (topic, 1)]) assignments_received[consumer_a].pop() message = consumer_a.poll(1) assert message is None or message.error( ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve' consumer_b = SynchronizedConsumer( bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'], consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset='earliest', ) consumer_b.subscribe([topic], on_assign=on_assign) consume_until_constraints_met(consumer_a, [ lambda message: assignments_received[consumer_a], ], 10) consume_until_constraints_met(consumer_b, [ lambda message: assignments_received[consumer_b], collect_messages_recieved(2), ], 10) for consumer in [consumer_a, consumer_b]: assert len(assignments_received[consumer][0]) == 1 message = consumer_a.poll(1) assert message is None or message.error( ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve' message = consumer_b.poll(1) assert message is None or message.error( ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve'
class KafkaProducer: def __init__(self, kafka_env='LOCAL', kafka_brokers="", kafka_user="", kafka_password=""): self.kafka_env = kafka_env self.kafka_brokers = kafka_brokers self.kafka_user = kafka_user self.kafka_password = kafka_password def prepareProducer(self, groupID="pythonproducers"): options = { 'bootstrap.servers': self.kafka_brokers, 'group.id': groupID } # We need this test as local kafka does not expect SSL protocol. if (self.kafka_env != 'LOCAL'): options['security.protocol'] = 'SASL_SSL' options['sasl.mechanisms'] = 'PLAIN' options['sasl.username'] = self.kafka_user options['sasl.password'] = self.kafka_password if (self.kafka_env == 'OCP'): options['sasl.mechanisms'] = 'SCRAM-SHA-512' options['ssl.ca.location'] = os.environ['PEM_CERT'] # Printing out producer config for debugging purposes print("[KafkaConsumer] - This is the configuration for the consumer:") print("[KafkaConsumer] - -------------------------------------------") print('[KafkaConsumer] - Bootstrap Server: {}'.format( options['bootstrap.servers'])) if (self.kafka_env != 'LOCAL'): # Obfuscate password if (len(self.kafka_password) > 3): obfuscated_password = self.kafka_password[ 0] + "*****" + self.kafka_password[len(self.kafka_password) - 1] else: obfuscated_password = "******" print('[KafkaConsumer] - Security Protocol: {}'.format( options['security.protocol'])) print('[KafkaConsumer] - SASL Mechanism: {}'.format( options['sasl.mechanisms'])) print('[KafkaConsumer] - SASL Username: {}'.format( options['sasl.username'])) print('[KafkaConsumer] - SASL Password: {}'.format( obfuscated_password)) if (self.kafka_env == 'OCP'): print('[KafkaConsumer] - SSL CA Location: {}'.format( options['ssl.ca.location'])) print("[KafkaConsumer] - -------------------------------------------") # Creating the producer self.producer = Producer(options) def delivery_report(self, err, msg): # Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). if err is not None: print('[ERROR] - [KafkaProducer] - Message delivery failed: {}'. format(err)) else: print('[KafkaProducer] - Message delivered to {} [{}]'.format( msg.topic(), msg.partition())) def publishEvent(self, topicName, eventToSend, keyName): dataStr = json.dumps(eventToSend) self.producer.produce(topicName, key=eventToSend[keyName], value=dataStr.encode('utf-8'), callback=self.delivery_report) self.producer.flush()
class KafkaConnector(object): """Simple wrapper class to configure a simple kafka consumer and producer pair, so that they can be used to perform simple filter() and map() operations over the received tweets""" def __init__( self, group_id=None, consumer_topic='consumer_limbo', producer_topic='consumer_limbo', logging_topic='minteressa_stats', bootstrap_servers='kafka:9092' ): self.group_id = group_id self.bootstrap_servers = bootstrap_servers self.consumer_topic = consumer_topic self.producer_topic = producer_topic self.logging_topic = logging_topic self.consumer = None self.producer = None def listen(self): while True: msg = self.consumer.poll() if msg is None: continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % ( msg.topic(), msg.partition(), msg.offset() ) ) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Proper message sys.stdout.write( '%s [partition-%d] at offset %d with key %s:\n' % ( msg.topic(), msg.partition(), msg.offset(), str(msg.key()) ) ) yield msg def connect(self): self.consumer = Consumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id, 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) print("subscribing to %s" % self.consumer_topic) self.consumer.subscribe([ self.consumer_topic ]) print("Subscribed to topic %s " % self.consumer_topic) self.producer = Producer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id }) def send(self, message, producer_topic=None): producer_topic = producer_topic \ if producer_topic is not None \ else self.producer_topic self.producer.produce( producer_topic, message ) # self.producer.flush() def log(self, message, logging_topic=None): logging_topic = logging_topic \ if logging_topic is not None \ else self.logging_topic self.producer.produce(logging_topic, message) self.producer.flush() def close(self): self.consumer.close() self.producer.close()
subject = schema.fullname # == "my.test.value" # io.confluent.kafka.serializers.subject.TopicRecordNameStrategy: # The subject name is <topic>-<type>, where <topic> is the Kafka topic name, and <type> is the fully-qualified # name of the Avro record type of the message. This setting also allows any number of event types in the same topic, # and further constrains the compatibility check to the current topic only. # subject = topic + '-' + schema.fullname # == "avro-python-producer-topic-my.test.value" # get registered schema id from the schema_registry schema_id = schema_registry.register(subject, schema) for i in range(5): key = "key-" + str(i) value = "value-" + str(i) record_value = avro_serde.encode_record_with_schema_id( schema_id=schema_id, record={ "name": value, "type": "avro" }, is_key=False, ) producer.produce(topic, key=key.encode('utf-8'), value=record_value) print("Produced:", key, record_value) producer.flush() print("End: avro-python-producer")
class KafkaDestination(object): """ syslog-ng Apache Kafka destination. """ _kafka_producer = None _conf = dict() def __init__(self): self.hosts = None self.topic = None self.msg_key = None self.partition = None self.programs = None self.group_id = None self.broker_version = None self.verbose = False self.display_stats = False self.producer_config = None def init(self, args): """ This method is called at initialization time. Should return False if initialization fails. """ if 'producer_config' in args: try: self.producer_config = ast.literal_eval(args['producer_config']) self._conf.update(self.producer_config) except ValueError: LOG.error("Given config %s is not in a Python dict format." % args['producer_config']) try: self.hosts = args['hosts'] self.topic = args['topic'] self._conf['bootstrap.servers'] = self.hosts except KeyError: LOG.error("Missing `hosts` or `topic` option...") return False if 'msg_key' in args: self.msg_key = args['msg_key'] LOG.info("Message key used will be %s" % self.msg_key) if 'partition' in args: self.partition = args['partition'] LOG.info("Partition to produce to %s" % self.partition) # optional `programs` parameter to filter out messages if 'programs' in args: self.programs = parse_str_list(args['programs']) LOG.info("Programs to filter against %s" % self.programs) if 'group_id' in args: self.group_id = args['group_id'] self._conf['group.id'] = self.group_id LOG.info("Broker group_id=%s" % self.group_id) if 'broker_version' in args: self.broker_version = args['broker_version'] if '.'.join(self.broker_version.split('.')[:2]) in ('0.10', '0.11'): self._conf['api.version.request'] = True else: self._conf['broker.version.fallback'] = self.broker_version self._conf['api.version.request'] = False LOG.info("Broker version=%s" % self.broker_version) else: self.broker_version = DEFAULT_BROKER_VERSION_FALLBACK self._conf[ 'broker.version.fallback'] = DEFAULT_BROKER_VERSION_FALLBACK self._conf['api.version.request'] = False LOG.warn("Default broker version fallback %s " "will be applied here." % DEFAULT_BROKER_VERSION_FALLBACK) self._conf['on_delivery'] = delivery_callback if 'verbose' in args: # provide a global `on_delivery` callback in the `Producer()` config # dict better for memory consumptions vs per message callback. self.verbose = ast.literal_eval(args['verbose']) if not self.verbose: # only interested in delivery failures here. We do provide a # global on_delivery callback in the Producer() config dict and # also set delivery.report.only.error. self._conf['delivery.report.only.error'] = True LOG.info("Verbose mode is OFF: you will not be able to see " "messages in here. Failures only. Use 'verbose=('True')' " "in your destination options to see successfully " "processed messages in your logs.") # display broker stats? if 'display_stats' in args: self.display_stats = ast.literal_eval(args['display_stats']) if self.display_stats: self._conf['stats_cb'] = stats_callback LOG.info("Broker statistics will be displayed.") LOG.info( "Initialization of Kafka Python driver w/ args=%s" % self._conf) return True def open(self): """ Open a connection to the Kafka service. Should return False if initialization fails. """ LOG.info("Opening connection to the remote Kafka services at %s" % self.hosts) self._kafka_producer = Producer(**self._conf) return True def is_opened(self): """ Check if the connection to Kafka is able to receive messages. Should return False if target is not open. """ return self._kafka_producer is not None def close(self): """ Close the connection to the Kafka service. """ LOG.debug("KafkaDestination.close()....") if self._kafka_producer is not None: LOG.debug("Flushing producer w/ a timeout of 30 seconds...") self._kafka_producer.flush(30) return True # noinspection PyMethodMayBeStatic def deinit(self): """ This method is called at deinitialization time. """ LOG.debug("KafkaDestination.deinit()....") if self._kafka_producer: self._kafka_producer = None return True def send(self, ro_msg): """ Send a message to the target service It should return True to indicate success, False will suspend the destination for a period specified by the time-reopen() option. :return: True or False """ # do nothing if msg is empty if not ro_msg: return True # no syslog-ng `values-pair` here we dealing with `LogMessage` if type(ro_msg) != dict: # syslog-ng `LogMessage` is read-only # goal is rfc5424 we cannot use values-pair because of memory leaks try: msg = {'FACILITY': ro_msg.FACILITY, 'PRIORITY': ro_msg.PRIORITY, 'HOST': ro_msg.HOST, 'PROGRAM': ro_msg.PROGRAM, 'DATE': ro_msg.DATE, 'MESSAGE': ro_msg.MESSAGE} except AttributeError: LOG.error("Your version of syslog-ng is not supported. " "Please use syslog-ng 3.7.x") return False else: LOG.warn("You are using `values-pair` if you are using " "syslog-ng <= 3.11 it is known to be leaking...") msg = ro_msg try: # check if we do have a program filter defined. msg_program = msg['PROGRAM'] if self.programs is not None: if msg_program not in self.programs: # notify of success return True if msg_program == 'firewall': firewall_msg = msg['MESSAGE'] msg['MESSAGE'] = parse_firewall_msg(firewall_msg) elif msg_program == 'nat': nat_msg = msg['MESSAGE'] msg['MESSAGE'] = parse_nat_msg(nat_msg) # convert date string to UNIX timestamp msg_date = msg['DATE'] if msg_date is not None: msg['DATE'] = date_str_to_timestamp(msg_date) msg_string = str(msg) kwargs = {} if self.msg_key and self.msg_key in msg.keys(): kwargs['key'] = msg[self.msg_key] if self.partition: try: kwargs['partition'] = int(self.partition) except ValueError: LOG.warning( "Ignore partition=%s because it is not an int." % self.partition) self._kafka_producer.produce(self.topic, msg_string, **kwargs) # `poll()` doesn't do any sleeping at all if you give it 0, all # it does is grab a mutex, check a queue, and release the mutex. # It is okay to call poll(0) after each produce call, the # performance impact is negligible, if any. self._kafka_producer.poll(0) except BufferError: LOG.error("Producer queue is full. This message will be discarded. " "%d messages waiting to be delivered.", len(self._kafka_producer)) # do not return False here as the destination would be closed # and we would have to restart syslog-ng sleep(5) return True except (KafkaException, UnicodeEncodeError) as e: LOG.error("An error occurred while trying to send messages... " "See details: %s" % e, exc_info=True) sleep(5) # do not return False here as the destination would be closed # and we would have to restart syslog-ng return True return True
def test_consumer_rebalance_from_committed_offset(requires_kafka): consumer_group = f"consumer-{uuid.uuid1().hex}" synchronize_commit_group = f"consumer-{uuid.uuid1().hex}" messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer( { "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "on_delivery": record_message_delivered, } ) with create_topic(partitions=2) as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(4): producer.produce(topic, f"{i}".encode(), partition=i % 2) assert producer.flush(5) == 0, "producer did not successfully flush queue" Consumer( {"bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "group.id": consumer_group} ).commit( offsets=[ TopicPartition(message.topic(), message.partition(), message.offset() + 1) for message in messages_delivered[topic][:2] ], asynchronous=False, ) consumer_a = SynchronizedConsumer( cluster_name="default", consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) assignments_received = defaultdict(list) def on_assign(consumer, assignment): assignments_received[consumer].append(assignment) consumer_a.subscribe([topic], on_assign=on_assign) # Wait until the first consumer has received its assignments. for i in range(10): # this takes a while assert consumer_a.poll(1) is None if assignments_received[consumer_a]: break assert ( len(assignments_received[consumer_a]) == 1 ), "expected to receive partition assignment" assert {(i.topic, i.partition) for i in assignments_received[consumer_a][0]} == { (topic, 0), (topic, 1), } assignments_received[consumer_a].pop() consumer_b = SynchronizedConsumer( cluster_name="default", consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) consumer_b.subscribe([topic], on_assign=on_assign) assignments = {} # Wait until *both* consumers have received updated assignments. for consumer in [consumer_a, consumer_b]: for i in range(10): # this takes a while assert consumer.poll(1) is None if assignments_received[consumer]: break assert ( len(assignments_received[consumer]) == 1 ), "expected to receive partition assignment" assert ( len(assignments_received[consumer][0]) == 1 ), "expected to have a single partition assignment" i = assignments_received[consumer][0][0] assignments[(i.topic, i.partition)] = consumer assert set(assignments.keys()) == {(topic, 0), (topic, 1)} for expected_message in messages_delivered[topic][2:]: consumer = assignments[(expected_message.topic(), expected_message.partition())] # Make sure that there are no messages ready to consume. assert consumer.poll(1) is None # Move the committed offset forward for our synchronizing group. producer.produce( commit_log_topic, key=f"{expected_message.topic()}:{expected_message.partition()}:{synchronize_commit_group}".encode(), value=f"{expected_message.offset() + 1}".encode(), ) assert producer.flush(5) == 0, "producer did not successfully flush queue" # We should have received a single message. # TODO: Can we also assert that the position is unpaused?) for i in range(5): received_message = consumer.poll(1) if received_message is not None: break assert received_message is not None, "no message received" assert received_message.topic() == expected_message.topic() assert received_message.partition() == expected_message.partition() assert received_message.offset() == expected_message.offset() # We should not be able to continue reading into the topic. # TODO: Can we assert that the position is paused? assert consumer.poll(1) is None
class ConfluentKafkaMsgQAPI: """ This class provides API's into interact with Kafka Queue. """ def __init__(self, is_producer=False, is_consumer=False, perform_subscription=False, thread_identifier=None): if not is_producer and not is_consumer: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer." ) pass self.producer_instance = None self.consumer_instance = None self.broker_name = None self.topic = None self.producer_conf = None self.consumer_conf = None self.is_topic_created = False self.perform_subscription = perform_subscription self.thread_identifier = thread_identifier self.__read_environment_variables() # if is_producer: # self.__producer_connect() # if is_consumer: # self.__consumer_connect() def __read_environment_variables(self): """ This method is used to read the environment variables defined in the OS. :return: """ while self.broker_name is None or \ self.topic is None: time.sleep(2) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: " "Trying to read the environment variables...") self.broker_name = os.getenv("broker_name_key", default=None) self.topic = os.getenv("topic_key", default=None) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name)) logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format( self.topic)) # Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). @staticmethod def delivery_callback(err, msg): if err: logging_to_console_and_syslog('%% Message failed delivery: %s\n' % err) else: logging_to_console_and_syslog( '%% Message delivered to %s [%d] @ %s\n' % (msg.topic(), msg.partition(), str(msg.offset()))) def __producer_connect(self): """ This method tries to connect to the kafka broker based upon the type of kafka. :return: """ is_connected = False if self.producer_instance is None: try: self.producer_conf = {'bootstrap.servers': self.broker_name} # Create Producer instance self.producer_instance = Producer(**self.producer_conf) is_connected = True except: print("Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) time.sleep(5) else: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Successfully " "connected to broker_name={}".format(self.broker_name)) return is_connected def enqueue(self, filename): """ This method tries to post a message to the pre-defined kafka topic. :param filename: :return status False or True: """ status = False if filename is None or len(filename) == 0: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: filename is None or invalid") return status if self.producer_instance is None: logging_to_console_and_syslog( "KafkaMsgQAPI: Producer instance is None. Trying to create one.." ) if not self.__producer_connect(): logging_to_console_and_syslog( "Unable to create producer instance.") return status if not self.is_topic_created: try: if self.producer_instance.list_topics(self.topic, timeout=1.0): logging_to_console_and_syslog( "Found topic name = {} in the zookeeper.".format( self.topic)) self.is_topic_created = True except KafkaException: kafka_admin_client = admin.AdminClient(self.producer_conf) logging_to_console_and_syslog("Creating topic {}.".format( self.topic)) ret = kafka_admin_client.create_topics(new_topics=[ admin.NewTopic(topic=self.topic, num_partitions=1) ], operation_timeout=1.0) logging_to_console_and_syslog("ret = {}".format(ret)) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Posting filename={} into " "kafka broker={}, topic={}".format(filename, self.broker_name, self.topic)) value = filename.encode('utf-8') try: # Produce line (without newline) self.producer_instance.produce( self.topic, value, callback=ConfluentKafkaMsgQAPI.delivery_callback) status = True except BufferError: sys.stderr.write('%% Local producer queue is full ' '(%d messages awaiting delivery): try again\n' % len(self.producer_instance)) status = False except: print("ConfluentKafkaMsgQAPI: Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) status = False else: event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \ "kafka broker={}, topic={}." \ .format(filename, self.broker_name, self.topic) logging_to_console_and_syslog(event) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. self.producer_instance.poll(timeout=0.1) # Wait until all messages have been delivered # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance)) self.producer_instance.flush(timeout=0.1) return status def __consumer_connect_to_broker(self): """ This method tries to connect to the kafka broker. :return: """ is_connected = False # Consumer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md """ self.consumer_conf = {'bootstrap.servers': self.broker_name, 'group.id': 'kafka-consumer', 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest'} """ if self.consumer_instance is None: try: logging_to_console_and_syslog( "Consumer:{}:Trying to connect to broker_name={}".format( self.thread_identifier, self.broker_name)) # Create Consumer instance # Hint: try debug='fetch' to generate some log messages consumer_conf = { 'bootstrap.servers': self.broker_name, 'group.id': self.topic, 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest' } # consumer_conf['stats_cb'] = stats_cb # consumer_conf['statistics.interval.ms'] = 0 self.consumer_instance = Consumer(consumer_conf) is_connected = True except: logging_to_console_and_syslog( "Consumer:{}:Exception in user code:".format( self.thread_identifier)) logging_to_console_and_syslog("-" * 60) traceback.print_exc(file=sys.stdout) logging_to_console_and_syslog("-" * 60) time.sleep(5) logging_to_console_and_syslog("Consumer:{}:Consumer Successfully " "connected to broker_name={}".format( self.thread_identifier, self.broker_name)) return is_connected @staticmethod def print_assignment(consumer, partitions): print('consumer = {}, Assignment {}:'.format(consumer, partitions)) def dequeue(self): conf = { 'bootstrap.servers': self.broker_name, 'group.id': self.topic, 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest' } if not self.consumer_instance: self.consumer_instance = Consumer(conf) self.consumer_instance.subscribe( [self.topic], on_assign=ConfluentKafkaMsgQAPI.print_assignment) msg = self.consumer_instance.poll(timeout=1.0) if msg is None or msg.error(): return None else: logging_to_console_and_syslog( '%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) msg = msg.value().decode('utf8') logging_to_console_and_syslog("msg.value()={}".format(msg)) self.consumer_instance.close() self.consumer_instance = None return msg def cleanup(self): if self.consumer_instance: self.consumer_instance.close() self.consumer_instance = None
def test_consumer_start_from_partition_start(requires_kafka): synchronize_commit_group = f"consumer-{uuid.uuid1().hex}" messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer( { "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "on_delivery": record_message_delivered, } ) with create_topic() as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(3): producer.produce(topic, f"{i}".encode()) assert producer.flush(5) == 0, "producer did not successfully flush queue" # Create the synchronized consumer. consumer = SynchronizedConsumer( cluster_name="default", consumer_group=f"consumer-{uuid.uuid1().hex}", commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) assignments_received = [] def on_assign(c, assignment): assert c is consumer assignments_received.append(assignment) consumer.subscribe([topic], on_assign=on_assign) # Wait until we have received our assignments. for i in range(10): # this takes a while assert consumer.poll(1) is None if assignments_received: break assert len(assignments_received) == 1, "expected to receive partition assignment" assert {(i.topic, i.partition) for i in assignments_received[0]} == {(topic, 0)} # TODO: Make sure that all partitions remain paused. # Make sure that there are no messages ready to consume. assert consumer.poll(1) is None # Move the committed offset forward for our synchronizing group. message = messages_delivered[topic][0] producer.produce( commit_log_topic, key=f"{message.topic()}:{message.partition()}:{synchronize_commit_group}".encode(), value=f"{message.offset() + 1}".encode(), ) assert producer.flush(5) == 0, "producer did not successfully flush queue" # We should have received a single message. # TODO: Can we also assert that the position is unpaused?) for i in range(5): message = consumer.poll(1) if message is not None: break assert message is not None, "no message received" expected_message = messages_delivered[topic][0] assert message.topic() == expected_message.topic() assert message.partition() == expected_message.partition() assert message.offset() == expected_message.offset() # We should not be able to continue reading into the topic. # TODO: Can we assert that the position is paused? assert consumer.poll(1) is None
class BMPNodes(object): def __init__(self, bootstrap_server=None, redishost=None): self.nodes = {} if redishost is None: raise ValueError("Redis Hostname not specified, bailing out") else: self.redis = redis.StrictRedis(host=redishost) self.redis.flushall() self.pubsub = self.redis.pubsub() self.routerevent = threading.Event() self.peerevent = threading.Event() self.threadList = [] self.poisonpillq = Queue.Queue() self.peer_consumer = None self.router_consumer = None self.prefix_consumer = None self.rib_producer = None if bootstrap_server is not None: self.bootstrap_server = bootstrap_server for fn in [ self.capture_router_msg, self.capture_peer_msg, self.capture_prefix_msg, self.redis_listener ]: thread = threading.Thread(target=fn, args=()) self.threadList.append(thread) thread.daemon = True # Daemonize thread thread.start() # Start the execution else: raise ValueError("Bootstrap server not specified") self.dispatch = {'init': self.add_router, 'term': self.delete_router} self.redis_dispatch = { 'AdjInRib': self.adjRibPolicyWorker, 'AdjInRibPP': self.localRibWorker, 'localRib': self.kafkaWorker } def get_nodes(self): nodeset = {} for node in self.nodes.keys(): rtr = self.nodes[node] nodeset.update({str(rtr.name) + ':' + str(rtr.ipaddr): node}) # Also provide the reverse mapping nodeset.update({node: str(rtr.name) + ':' + str(rtr.ipaddr)}) return nodeset def serialize(self): nodeset = {} for node in self.nodes.keys(): nodeset.update({node: self.nodes[node].serialize()}) return nodeset class PoisonPillException(Exception): pass def consumer_cleanup(self): logger.debug("Cleaning up, exiting the active threads") for thread in self.threadList: self.poisonpillq.put("quit") # The redis listener will need the poisonpill channel publish self.redis.publish('poisonpill', "quit") for thread in self.threadList: logger.debug("Waiting for %s to finish..." % (thread.name)) thread.join() return def process_msg(self, router_msg): # Ignore the first message (action = first) for msg in router_msg: if str(msg['action']) != 'first': self.dispatch[str(msg['action'])](msg) else: logger.debug("Ignoring action=first in openbmp router message") def add_router(self, router_msg): if str(router_msg['hash']) not in self.nodes: # Create the router object node = Node(node_hash=router_msg['hash'], name=router_msg.pop('name'), ipaddr=router_msg.pop('ip_address'), data=router_msg) # Add to existing router set self.nodes.update({str(router_msg['hash']): node}) else: logger.debug( "Received an add event for an existing peer. Strange, but ignore" ) def delete_router(self, router_msg): if str(router_msg['hash']) in self.nodes: # Delete the particular router from the current router set del self.nodes[str(router_msg['hash'])] # Delete the router hash from redis self.redis.delete(str(router_msg['hash'])) else: logger.debug( "Received a del event for a non-existent peer, ignore") def update_redis(self, channel=None): # Called to reflect latest state when new messages are received. nodes = {} if self.get_nodes(): self.redis.hmset("routers", self.get_nodes()) for node in self.nodes.keys(): self.redis.hmset(node, self.nodes[node].serialize()) if channel: # Publish message to redis Listeners self.redis.publish( channel, "Publish to " + str(self.redis_dispatch[channel].__name__) + " worker") def redis_listener(self): self.pubsub.subscribe( ['AdjInRib', 'AdjInRibPP', 'localRib', 'poisonpill']) pill = '' try: while True: for item in self.pubsub.listen(): logger.info("Received Redis event") if item['data'] == "quit": self.pubsub.unsubscribe() logger.debug( "unsubscribed and finished redis pubsub listener") raise self.PoisonPillException else: if item['channel'] in self.redis_dispatch: self.redis_dispatch[item['channel']]() except self.PoisonPillException: return except Exception as e: logger.debug("Error while listening to redis events") logger.debug("Error is" + str(e)) return def adjRibPolicyWorker(self): logger.debug("Received an AdjInRib event") # walk through the nodes and apply available policies #nodes = {} if self.get_nodes(): for node in self.nodes.keys(): # process and apply policies self.nodes[node].adjInRibPP.process_adjInRib(node, self.redis) self.update_redis('AdjInRibPP') def localRibWorker(self): # walk through the nodes and apply available path selection algorithms #nodes = {} if self.get_nodes(): for node in self.nodes.keys(): # process and do path selection self.nodes[node].localRib.process_adjInRibPP(node, self.redis) self.update_redis('localRib') # Optional per-message delivery callback (triggered by poll() or flush()) # during the rib stream to kafka when a message has been successfully delivered # or permanently failed delivery (after retries). @staticmethod def delivery_callback(err, msg): if err: logger.debug('%% Message failed delivery: %s\n' % err) else: logger.debug('%% Message delivered to %s [%d]\n' % (msg.topic(), msg.partition())) def kafkaWorker(self): # With the local Rib ready, push routes to Kafka. This is meant to # serve as a streaming set of routes to router clients which will be # kafka consumers. This is NOT a way to resync if the router dies or # router client disconnects - for that sync with the redis database # first and then start listening to fresh messages from Kafka for route events. self.rib_producer = Producer( {'bootstrap.servers': self.bootstrap_server}) if self.get_nodes(): for node in self.nodes.keys(): topic = self.nodes[node].hash # fetch localRib routes from Redis, push to Kafka bus localRib = ast.literal_eval(self.redis.hget(node, 'localRib')) if localRib: for route in localRib: logger.debug(route) # self.shuttler.rtQueue.put(route) try: self.rib_producer.produce( topic, value=json.dumps(route), callback=self.delivery_callback) self.rib_producer.poll(0) except BufferError as e: logger.debug( '%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(self.rib_producer)) # putting the poll() first to block until there is queue space available. # This blocks for RIB_PRODUCER_WAIT_INTERVAL seconds because message delivery can take some time # if there are temporary errors on the broker (e.g., leader failover). self.rib_producer.poll(RIB_PRODUCER_WAIT_INTERVAL * 1000) # Now try again when there is hopefully some free space on the queue self.rib_producer.produce( topic, value=json.dumps(route), callback=self.delivery_callback) # Wait until all messages have been delivered logger.debug('%% Waiting for %d deliveries\n' % len(self.rib_producer)) self.rib_producer.flush() def capture_router_msg(self): pill = '' topics = ['openbmp.parsed.router'] logger.debug("Connecting to Kafka to receive router messages") self.router_consumer = Consumer({ 'bootstrap.servers': self.bootstrap_server, 'group.id': 'bmp_client' + str(time.time()), 'client.id': 'bmp_client' + str(time.time()), 'default.topic.config': { 'auto.offset.reset': 'smallest', 'auto.commit.interval.ms': 1000, 'enable.auto.commit': True } }) self.router_consumer.subscribe(topics) try: while True: msg = self.router_consumer.poll(timeout=1.0) try: pill = self.poisonpillq.get_nowait() except Queue.Empty: pass if isinstance(pill, str) and pill == "quit": raise self.PoisonPillException if msg is None: self.routerevent.set() continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event logger.debug( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Process the message m = Message(msg.value()) # Gets body of kafka message. t = msg.topic() # Gets topic of kafka message. m_tag = t.split('.')[2].upper() t_stamp = str(datetime.datetime.now()) if t == "openbmp.parsed.router": router = Router(m) logger.debug('Received Message (' + t_stamp + ') : ' + m_tag + '(V: ' + str(m.version) + ')') logger.debug(router.to_json_pretty()) router_msg = yaml.safe_load(router.to_json_pretty()) logger.debug("Calling process msg for Router messages") bmpnodes.process_msg(router_msg) # update redis self.update_redis() self.routerevent.clear() except self.PoisonPillException: logger.debug("Poison Pill received") logger.debug("Shutting down the router message consumer") self.router_consumer.close() return except Exception as e: logger.debug( "Exception occurred while listening for router messages") logger.debug("Error is " + str(e)) self.router_consumer.close() return def capture_peer_msg(self): pill = '' topics = ['openbmp.parsed.peer'] logger.info("Connecting to Kafka to receive peer messages") self.peer_consumer = Consumer({ 'bootstrap.servers': self.bootstrap_server, 'group.id': 'bmp_client' + str(time.time()), 'client.id': 'bmp_client' + str(time.time()), 'default.topic.config': { 'auto.offset.reset': 'smallest', 'auto.commit.interval.ms': 1000, 'enable.auto.commit': True } }) self.peer_consumer.subscribe(topics) try: while True: msg = self.peer_consumer.poll(timeout=1.0) try: pill = self.poisonpillq.get_nowait() except Queue.Empty: pass if isinstance(pill, str) and pill == "quit": raise self.PoisonPillException if msg is None: self.peerevent.set() continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event logger.debug( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Process the message m = Message(msg.value()) # Gets body of kafka message. t = msg.topic() # Gets topic of kafka message. m_tag = t.split('.')[2].upper() t_stamp = str(datetime.datetime.now()) if t == "openbmp.parsed.peer": peer = Peer(m) logger.debug('Received Message (' + t_stamp + ') : ' + m_tag + '(V: ' + str(m.version) + ')') logger.debug(peer.to_json_pretty()) peer_msg = yaml.safe_load(peer.to_json_pretty()) for msg in peer_msg: processed = False while not processed: if str(msg['router_hash']) in self.nodes: self.nodes[str( msg['router_hash'])].process_msg(msg) processed = True else: logger.debug( "Received peer message for currently unknown Router, hash=" + str(msg['router_hash'])) logger.debug( "Let's wait for router_msg event to be set" ) self.routerevent.wait( PEER_MSG_DAMPENING_TIMER) # Go ahead and update Redis self.update_redis() self.peerevent.clear() except self.PoisonPillException: logger.debug("Poison Pill received") logger.debug("Shutting down the peer message consumer") self.peer_consumer.close() return except Exception as e: logger.debug( "Exception occured while listening to peer messages from Kafka" ) logger.debug("Error is " + str(e)) self.router_consumer.close() return def capture_prefix_msg(self): pill = '' topics = ['openbmp.parsed.unicast_prefix'] logger.debug("Connecting to Kafka to receive prefix messages") self.prefix_consumer = Consumer({ 'bootstrap.servers': self.bootstrap_server, 'group.id': 'bmp_client' + str(time.time()), 'client.id': 'bmp_client' + str(time.time()), 'default.topic.config': { 'auto.offset.reset': 'smallest', 'auto.commit.interval.ms': 1000, 'enable.auto.commit': True } }) self.prefix_consumer.subscribe(topics) try: while True: msg = self.prefix_consumer.poll(timeout=1.0) try: pill = self.poisonpillq.get_nowait() except Queue.Empty: pass if isinstance(pill, str) and pill == "quit": raise self.PoisonPillException if msg is None: continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event logger.debug( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Process the message m = Message(msg.value()) # Gets body of kafka message. t = msg.topic() # Gets topic of kafka message. m_tag = t.split('.')[2].upper() t_stamp = str(datetime.datetime.now()) if t == "openbmp.parsed.unicast_prefix": unicast_prefix = UnicastPrefix(m) logger.debug('Received Message (' + t_stamp + ') : ' + m_tag + '(V: ' + str(m.version) + ')') logger.debug(unicast_prefix.to_json_pretty()) prefix_msg = yaml.safe_load( unicast_prefix.to_json_pretty()) for msg in prefix_msg: processed = False while not processed: if str(msg['router_hash']) in self.nodes: self.nodes[str(msg['router_hash'] )].adjInRib.process_msg(msg) processed = True else: logger.debug( "Received peer message for currently unknown Router, hash=" + str(msg['router_hash'])) logger.debug( "Let's wait for router_msg event to be set" ) self.peerevent.wait( PREFIX_MSG_DAMPENING_TIMER) # Go ahead and update Redis self.update_redis('AdjInRib') except self.PoisonPillException: logger.debug("Poison Pill received") logger.debug("Shutting down the prefix message consumer") self.prefix_consumer.close() return except Exception as e: logger.debug( "Exception occurred while listening for prefix messages") logger.debug("Error is " + str(e)) self.prefix_consumer.close() return
# Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). def delivery_callback(err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d] @ %d\n' % (msg.topic(), msg.partition(), msg.offset())) # Read lines from stdin, produce each line to Kafka for line in sys.stdin: try: # Produce line (without newline) p.produce(topic, line.rstrip(), callback=delivery_callback) except BufferError: sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(p)) # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. p.poll(0) # Wait until all messages have been delivered sys.stderr.write('%% Waiting for %d deliveries\n' % len(p)) p.flush()
def test_produce_headers(): """ Test produce() with timestamp arg """ p = Producer({ 'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10 }) binval = pack('hhl', 1, 2, 3) headers_to_test = [ [('headerkey', 'headervalue')], [('dupkey', 'dupvalue'), ('empty', ''), ('dupkey', 'dupvalue')], [('dupkey', 'dupvalue'), ('dupkey', 'diffvalue')], [('key_with_null_value', None)], [('binaryval', binval)], [('alreadyutf8', u'Småland'.encode('utf-8'))], [('isunicode', 'Jämtland')], { 'headerkey': 'headervalue' }, { 'dupkey': 'dupvalue', 'empty': '', 'dupkey': 'dupvalue' }, # noqa: F601 { 'dupkey': 'dupvalue', 'dupkey': 'diffvalue' }, # noqa: F601 { 'key_with_null_value': None }, { 'binaryval': binval }, { 'alreadyutf8': u'Småland'.encode('utf-8') }, { 'isunicode': 'Jämtland' } ] for headers in headers_to_test: print('headers', type(headers), headers) p.produce('mytopic', value='somedata', key='a key', headers=headers) p.produce('mytopic', value='somedata', headers=headers) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', key='a key', headers=('a', 'b')) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', key='a key', headers=[('malformed_header')]) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', headers={'anint': 1234}) p.flush()
def test_consumer_start_from_partition_start(): synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex) messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer({ 'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'], 'on_delivery': record_message_delivered, }) with create_topic() as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(3): producer.produce(topic, '{}'.format(i).encode('utf8')) assert producer.flush( 5) == 0, 'producer did not successfully flush queue' # Create the synchronized consumer. consumer = SynchronizedConsumer( bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'], consumer_group='consumer-{}'.format(uuid.uuid1().hex), commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset='earliest', ) assignments_received = [] def on_assign(c, assignment): assert c is consumer assignments_received.append(assignment) consumer.subscribe([topic], on_assign=on_assign) # Wait until we have received our assignments. for i in xrange(10): # this takes a while assert consumer.poll(1) is None if assignments_received: break assert len(assignments_received ) == 1, 'expected to receive partition assignment' assert set((i.topic, i.partition) for i in assignments_received[0]) == set([(topic, 0)]) # TODO: Make sure that all partitions remain paused. # Make sure that there are no messages ready to consume. assert consumer.poll(1) is None # Move the committed offset forward for our synchronizing group. message = messages_delivered[topic][0] producer.produce( commit_log_topic, key='{}:{}:{}'.format( message.topic(), message.partition(), synchronize_commit_group, ).encode('utf8'), value='{}'.format(message.offset() + 1, ).encode('utf8'), ) assert producer.flush( 5) == 0, 'producer did not successfully flush queue' # We should have received a single message. # TODO: Can we also assert that the position is unpaused?) for i in xrange(5): message = consumer.poll(1) if message is not None: break assert message is not None, 'no message received' expected_message = messages_delivered[topic][0] assert message.topic() == expected_message.topic() assert message.partition() == expected_message.partition() assert message.offset() == expected_message.offset() # We should not be able to continue reading into the topic. # TODO: Can we assert that the position is paused? assert consumer.poll(1) is None
class KafkaWorkflowResultsSender(object): def __init__(self, execution_db, message_converter=ProtobufWorkflowResultsConverter, socket_id=None): self._ready = False self.id_ = socket_id kafka_config = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_CONFIG self.producer = Producer(kafka_config) self.execution_db = execution_db self.topic = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_TOPIC self.message_converter = message_converter if self.check_status(): self._ready = True def shutdown(self): self.producer.flush() @staticmethod def _delivery_callback(err, msg): if err is not None: logger.error('Kafka message delivery failed: {}'.format(err)) def _format_topic(self, event): return '{}.{}'.format(self.topic, event.name) def handle_event(self, workflow, sender, **kwargs): """Listens for the data_sent callback, which signifies that an execution element needs to trigger a callback in the main thread. Args: workflow (Workflow): The Workflow object that triggered the event sender (ExecutionElement): The execution element that sent the signal. kwargs (dict): Any extra data to send. """ event = kwargs['event'] if event in [WalkoffEvent.TriggerActionAwaitingData, WalkoffEvent.WorkflowPaused]: saved_workflow = SavedWorkflow.from_workflow(workflow) self.execution_db.session.add(saved_workflow) self.execution_db.session.commit() elif event == WalkoffEvent.ConsoleLog: action = workflow.get_executing_action() sender = action if self.id_: packet_bytes = self.message_converter.event_to_protobuf(sender, workflow, **kwargs) self.producer.produce(self._format_topic(event), packet_bytes, callback=self._delivery_callback) else: event.send(sender, data=kwargs.get('data', None)) def is_ready(self): return self._ready def check_status(self): if self.producer is not None: return True return False def send_ready_message(self): WalkoffEvent.CommonWorkflowSignal.send(sender={'id': '1'}, event=WalkoffEvent.WorkerReady) def create_workflow_request_message(self, workflow_id, workflow_execution_id, start=None, start_arguments=None, resume=False, environment_variables=None, user=None): return self.message_converter.create_workflow_request_message(workflow_id, workflow_execution_id, start, start_arguments, resume, environment_variables, user)
from confluent_kafka import Producer p = Producer({'bootstrap.servers': 'localhost:9092'}) p.produce('mytopic', value='world3') p.flush(30) ''' def acked(err, msg): if err is not None: print("Failed to deliver message: {0}: {1}" .format(msg.value(), err.str())) else: print("Message produced: {0}".format(msg.value())) p = Producer({'bootstrap.servers': 'localhost:9092'}) try: for val in xrange(1, 1000): p.produce('mytopic', 'myvalue #{0}' .format(val), callback=acked) p.poll(0.5) except KeyboardInterrupt: pass p.flush(30) '''
def publish(request): elasticsearch_server = 'http://localhost:9200/clincoded' return_object = {'status': 'Fail', 'message': 'Unable to deliver message'} # Check that required parameters have been provided if not('type' in request.params and 'uuid' in request.params): return_object['message'] = 'Required parameters missing in request' return return_object # Attempt to retrieve data (from Elasticsearch) try: searchRes = requests.get('{}/{}/{}'.format(elasticsearch_server, request.params['type'], request.params['uuid']), timeout=10) if searchRes.status_code != requests.codes.ok: return_object['message'] = 'Data search failed' return return_object except Exception as e: return_object['message'] = 'Data search could not be completed' return return_object # Store JSON-encoded content of search result(s) try: resultJSON = searchRes.json() except Exception as e: return_object['message'] = 'Retrieved data not in expected format' return return_object # Check that search found data if 'found' not in resultJSON or not(resultJSON['found']): return_object['message'] = 'Requested data could not be found' return return_object # Check that data has expected elements try: data_type_to_publish = resultJSON['_source']['embedded']['resourceType'] if data_type_to_publish == 'classification': evidence_to_publish = resultJSON['_source']['embedded']['resourceParent']['gdm'] publishing_affiliation = resultJSON['_source']['embedded']['resource']['affiliation'] evidence_counts_to_publish = resultJSON['_source']['embedded']['resource']['classificationPoints'] elif data_type_to_publish == 'interpretation': evidence_to_publish = resultJSON['_source']['embedded']['resourceParent']['interpretation'] else: raise Exception except Exception as e: return_object['message'] = 'Retrieved data missing expected elements' return return_object # Check that message should be sent? (approved status? permission to publish?) # Construct message try: if data_type_to_publish == 'interpretation': message_template = deepcopy(clincoded.messaging.templates.vci_to_dx.message_template) data_to_remove = clincoded.messaging.templates.vci_to_dx.data_to_remove add_data_to_msg_template(resultJSON['_source']['embedded'], None, None, message_template) else: message_template = deepcopy(clincoded.messaging.templates.gci_to_dx.message_template) classification_points = deepcopy(evidence_counts_to_publish) add_data_to_msg_template(resultJSON['_source']['embedded'], gather_evidence(evidence_to_publish, publishing_affiliation), gather_evidence_counts(classification_points, True), message_template) message = json.dumps(message_template, separators=(',', ':')) except Exception as e: return_object['message'] = 'Failed to build complete message' return return_object # Transform message (if necessary, via independent service) try: if data_type_to_publish == 'interpretation': remove_data_from_msg_template(data_to_remove, message_template['interpretation']) message_template['interpretation'] = transform_interpretation(message_template['interpretation'], request.host) message = json.dumps(message_template, separators=(',', ':')) except Exception as e: if e.args: return_object['message'] = e.args else: return_object['message'] = 'Failed to build complete message' return return_object # Configure message delivery parameters kafka_cert_pw = '' if 'KAFKA_CERT_PW' in os.environ: kafka_cert_pw = os.environ['KAFKA_CERT_PW'] kafka_conf = {'bootstrap.servers': 'localhost:9093', 'log_level': 0, 'security.protocol': 'ssl', 'ssl.key.location': 'etc/certs/client.key', 'ssl.key.password': kafka_cert_pw, 'ssl.certificate.location': 'etc/certs/client.crt', 'ssl.ca.location': 'etc/certs/server.crt'} kafka_topic = 'test' kafka_timeout = 10 if request.host != 'localhost:6543': kafka_conf = {'bootstrap.servers': 'exchange.clinicalgenome.org:9093', 'log_level': 0, 'security.protocol': 'ssl', 'ssl.key.location': 'etc/certs/dataexchange/client.key', 'ssl.key.password': kafka_cert_pw, 'ssl.certificate.location': 'etc/certs/dataexchange/client.crt', 'ssl.ca.location': 'etc/certs/dataexchange/server.crt'} if data_type_to_publish == 'interpretation': kafka_topic = 'variant_interpretation' else: kafka_topic = 'gene_validity' if request.host != 'curation.clinicalgenome.org': kafka_topic += '_dev' # Send message p = Producer(**kafka_conf) def delivery_callback(err, msg): nonlocal return_object if err: return_object['message'] = err else: return_object = {'status': 'Success', 'message': message, 'partition': msg.partition(), 'offset': msg.offset()} try: p.produce(kafka_topic, message, callback=delivery_callback) p.flush(kafka_timeout) return return_object except Exception as e: return_object['message'] = 'Message delivery failed' return return_object