def produce(): p = Producer({'bootstrap.servers': KAFKA_SERVER}) i = 0 while True: p.produce(KAFKA_TOPIC, "message" + str(i)) i += 1
def test_produce_headers_should_fail(): """ Test produce() with timestamp arg """ p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) with pytest.raises(NotImplementedError) as e: p.produce('mytopic', value='somedata', key='a key', headers=[('headerkey', 'headervalue')]) assert 'Producer message headers requires confluent-kafka-python built for librdkafka version >=v0.11.4' in str(e)
class KafkaPublisher(object): def __init__(self, connection, asynchronous=True): from confluent_kafka import Producer self.producer = Producer(connection or {}) self.asynchronous = asynchronous def publish(self, channel, value, key=None): self.producer.produce(topic=channel, value=value, key=key) if not self.asynchronous: self.producer.flush()
def publish(datasource="USB"): p = Producer({'bootstrap.servers': 'localhost:9092'}) if datasource=="USB": inputf=Streaming_AbstractGenerator.StreamAbsGen("USBWWAN_stream","USBWWAN") else: inputf=Streaming_AbstractGenerator.StreamAbsGen("KingCobra","KingCobra") for data in inputf: print "publishing data:",data," in Kafka Topic" try: p.produce('neuronraindata', data.encode('utf-8')) except: pass
def test_produce_timestamp(): """ Test produce() with timestamp arg """ p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) # Requires librdkafka >=v0.9.4 try: p.produce('mytopic', timestamp=1234567) except NotImplementedError: # Should only fail on non-supporting librdkafka if libversion()[1] >= 0x00090400: raise p.flush()
def producer(args, sniff_timeout_ms=500, sniff_promisc=True): """ Captures packets from a network interface and sends them to a Kafka topic. """ # setup the signal handler signal.signal(signal.SIGINT, signal_handler) global producer_args producer_args = args # connect to kafka logging.info("Connecting to Kafka; %s", args.kafka_configs) kafka_producer = Producer(args.kafka_configs) # initialize packet capture logging.info("Starting packet capture") capture = pcapy.open_live(args.interface, args.snaplen, sniff_promisc, sniff_timeout_ms) pkts_in = 0 try: while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets): # capture a packet (pkt_hdr, pkt_raw) = capture.next() if pkt_hdr is not None: logging.debug("Packet received: pkts_in=%d, pkt_len=%s", pkts_in, pkt_hdr.getlen()) pkts_in += 1 pkt_ts = timestamp(pkt_hdr) kafka_producer.produce(args.kafka_topic, key=pack_ts(pkt_ts), value=pkt_raw, callback=delivery_callback) # pretty print, if needed if args.pretty_print > 0 and pkts_in % args.pretty_print == 0: print 'Packet received[%s]' % (pkts_in) # serve the callback queue kafka_producer.poll(0) finally: # flush all messages logging.info("Waiting for '%d' message(s) to flush", len(kafka_producer)) kafka_producer.flush() # pkts_out may not be initialized if the callback was never executed pkts_out = 0 if hasattr(delivery_callback, "pkts_out"): pkts_out = delivery_callback.pkts_out logging.info("'%d' packet(s) in, '%d' packet(s) out", pkts_in, pkts_out)
class KafkaWorkflowCommunicationSender(object): _requires = ['confluent-kafka'] def __init__(self, message_converter=ProtobufWorkflowCommunicationConverter): kafka_config = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_CONFIG self.producer = Producer(kafka_config) self.topic = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_TOPIC self.message_converter = message_converter def shutdown(self): self.producer.flush() @staticmethod def _delivery_callback(err, msg): if err is not None: logger.error('Kafka message delivery failed: {}'.format(err)) def pause_workflow(self, workflow_execution_id): """Pauses a workflow currently executing. Args: workflow_execution_id (UUID): The execution ID of the workflow. """ logger.info('Pausing workflow {0}'.format(workflow_execution_id)) message = self.message_converter.create_workflow_pause_message(workflow_execution_id) self._send_workflow_communication_message(message, workflow_execution_id) def abort_workflow(self, workflow_execution_id): """Aborts a workflow currently executing. Args: workflow_execution_id (UUID): The execution ID of the workflow. """ logger.info('Aborting running workflow {0}'.format(workflow_execution_id)) message = self.message_converter.create_workflow_abort_message(workflow_execution_id) self._send_workflow_communication_message(message, workflow_execution_id) def send_exit_to_workers(self): """Sends the exit message over the communication sockets, otherwise worker receiver threads will hang""" message = self.message_converter.create_worker_exit_message() self._send_workflow_communication_message(message, None) def _send_workflow_communication_message(self, message, workflow_id): self._send_message(message, self.topic, workflow_id) def _send_message(self, message, topic, key): self.producer.produce(topic, message, key=key, callback=self._delivery_callback)
def test_produce_headers(): """ Test produce() with timestamp arg """ p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) binval = pack('hhl', 1, 2, 3) headers_to_test = [ [('headerkey', 'headervalue')], [('dupkey', 'dupvalue'), ('empty', ''), ('dupkey', 'dupvalue')], [('dupkey', 'dupvalue'), ('dupkey', 'diffvalue')], [('key_with_null_value', None)], [('binaryval', binval)], [('alreadyutf8', u'Småland'.encode('utf-8'))], [('isunicode', 'Jämtland')], {'headerkey': 'headervalue'}, {'dupkey': 'dupvalue', 'empty': '', 'dupkey': 'dupvalue'}, # noqa: F601 {'dupkey': 'dupvalue', 'dupkey': 'diffvalue'}, # noqa: F601 {'key_with_null_value': None}, {'binaryval': binval}, {'alreadyutf8': u'Småland'.encode('utf-8')}, {'isunicode': 'Jämtland'} ] for headers in headers_to_test: print('headers', type(headers), headers) p.produce('mytopic', value='somedata', key='a key', headers=headers) p.produce('mytopic', value='somedata', headers=headers) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', key='a key', headers=('a', 'b')) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', key='a key', headers=[('malformed_header')]) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', headers={'anint': 1234}) p.flush()
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: p = Producer() except TypeError as e: assert str(e) == "expected configuration dict" p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) p.produce('mytopic') p.produce('mytopic', value='somedata', key='a key') def on_delivery(err, msg): print('delivery', str) # Since there is no broker, produced messages should time out. assert err.code() == KafkaError._MSG_TIMED_OUT p.produce(topic='another_topic', value='testing', partition=9, callback=on_delivery) p.poll(0.001) p.flush(0.002) p.flush() try: p.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
def test_dr_msg_errstr(): """ Test that the error string for failed messages works (issue #129). The underlying problem is that librdkafka reuses the message payload for error value on Consumer messages, but on Producer messages the payload is the original payload and no rich error string exists. """ p = Producer({"message.timeout.ms": 10}) def handle_dr(err, msg): # Neither message payloads must not affect the error string. assert err is not None assert err.code() == KafkaError._MSG_TIMED_OUT assert "Message timed out" in err.str() # Unicode safe string p.produce('mytopic', "This is the message payload", on_delivery=handle_dr) # Invalid unicode sequence p.produce('mytopic', "\xc2\xc2", on_delivery=handle_dr) p.flush()
class Publisher(): def __init__(self, config={'bootstrap.servers': 'pulsing.jhk.org:9092', 'retries': 3, 'api.version.request': True}): super().__init__() self.__producer = Producer(config) self.logger = logging.getLogger(__name__) def publish(self, topic, data): self.logger.debug('publish %s - %s', topic, data) self.__producer.produce(topic, data.encode('utf-8')) self.__producer.flush() @property def producer(self): return self.__producer def __eq__(self, other): return self.__producer == other.__producer def __str__(self): return self.__producer.__str__() def __hash__(self): return self.__producer.__hash__()
def write(self, data): p = Producer(config) for record in data: p.produce("sensor-" + record["sensor"], json.dumps(record).encode('utf-8')) p.flush()
from confluent_kafka import Producer def acked(err, msg): if err is not None: print("Failed to deliver message: {0}: {1}".format( msg.value(), err.str())) else: print("Message produced: {0}".format(msg.value())) p = Producer({'bootstrap.servers': 'localhost:9092'}) try: p.produce('mytopic', 'activty-01198', callback=acked) except KeyboardInterrupt: pass p.flush(30)
''' for i in range(msgCount): fakeNumber = str(i) # 讓我們產生假的Employee資料 employee = Employee(id_='emp_id_' + fakeNumber, first_name='fn_' + fakeNumber, last_name='ln_' + fakeNumber, dept_id='dept_id_' + str(i % 10), hire_date=epoch_now_mills(), wage=float(i), sex=True) # 轉換成JSON字串 employeeJson = json.dumps(employee.__dict__) # 送出訊息 producer.produce(topicName, key=str(i), value=employeeJson, callback=delivery_callback) producer.poll(0) # 呼叫poll來讓client程式去檢查內部的Buffer, 並觸發callback if i % 10000 == 0: print('Send {} messages'.format(i)) time_spend = int(round(time.time() * 1000)) - time_start print('Send : ' + str(msgCount) + ' messages to Kafka') print('Total spend : ' + str(time_spend) + ' millis-seconds') print('Throughput : ' + str(msgCount / time_spend * 1000) + ' msg/sec') except BufferError as e: # 錯誤處理 sys.stderr.write( '%% Local producer queue is full ({} messages awaiting delivery): try again\n' .format(len(producer))) except Exception as e: print(e)
def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) total = 0 with open("access_log.txt", "r") as f: for line in f.readlines(): # Trigger any available delivery report callbacks from previous produce() calls producer.poll(0) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. producer.produce("testLogs", line.strip().encode('utf-8'), callback=delivery_report) total += 1 print(line.strip(), " ", total) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. producer.flush()
class KafkaConnector(object): """Simple wrapper class to configure a simple kafka consumer and producer pair, so that they can be used to perform simple filter() and map() operations over the received tweets""" def __init__( self, group_id=None, consumer_topic='consumer_limbo', producer_topic='consumer_limbo', logging_topic='minteressa_stats', bootstrap_servers='kafka:9092' ): self.group_id = group_id self.bootstrap_servers = bootstrap_servers self.consumer_topic = consumer_topic self.producer_topic = producer_topic self.logging_topic = logging_topic self.consumer = None self.producer = None def listen(self): while True: msg = self.consumer.poll() if msg is None: continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % ( msg.topic(), msg.partition(), msg.offset() ) ) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Proper message sys.stdout.write( '%s [partition-%d] at offset %d with key %s:\n' % ( msg.topic(), msg.partition(), msg.offset(), str(msg.key()) ) ) yield msg def connect(self): self.consumer = Consumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id, 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) print("subscribing to %s" % self.consumer_topic) self.consumer.subscribe([ self.consumer_topic ]) print("Subscribed to topic %s " % self.consumer_topic) self.producer = Producer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id }) def send(self, message, producer_topic=None): producer_topic = producer_topic \ if producer_topic is not None \ else self.producer_topic self.producer.produce( producer_topic, message ) # self.producer.flush() def log(self, message, logging_topic=None): logging_topic = logging_topic \ if logging_topic is not None \ else self.logging_topic self.producer.produce(logging_topic, message) self.producer.flush() def close(self): self.consumer.close() self.producer.close()
except Exception as e: # Continue if error code TOPIC_ALREADY_EXISTS, which may be true if e.args[0].code() != KafkaError.TOPIC_ALREADY_EXISTS: print("Failed to create topic {}: {}".format(topic, e)) # Optional per-message on_delivery handler (triggered by poll() or flush()) # when a message has been successfully delivered or # permanently failed delivery (after retries). def acked(err, msg): """Delivery report handler called on successful or failed delivery of message """ if err is not None: print("Failed to deliver message: {}".format(err)) else: print("Produced record to topic {} partition [{}] @ offset {}" .format(msg.topic(), msg.partition(), msg.offset())) for n in range(10): record_key = "alice" record_value = json.dumps({'count': n}) print("Producing record: {}\t{}".format(record_key, record_value)) p.produce(topic, key=record_key, value=record_value, on_delivery=acked) # p.poll() serves delivery reports (on_delivery) # from previous produce() calls. p.poll(0) p.flush(10) print("10 messages were produced to topic {}!".format(topic))
def delivery_report( err, msg, ): if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message offset: {} delivered to {} [{}]'.format( msg.offset(), msg.topic(), msg.partition())) m_count = 0 called = True while m_count <= 10000: #p.poll(0.01) m_count += 1 #if m_count%30 == 0: # time.sleep(0.01) if called is False: start = time.time() called = True p.produce('testTopic', 'Test1', callback=delivery_report) p.produce('testTopic', 'Stop1') p.flush()
import socket from confluent_kafka import Producer conf = {'bootstrap.servers': "broker-1:19092,broker-2:29092,broker-3:39092", 'client.id': socket.gethostname()} producer = Producer(conf) # Fire and forgot producer.produce(topic='hello-world-topic', key=None, value="Hello World from Python") producer.poll(1)
import json import random from confluent_kafka import Producer p = Producer({'bootstrap.servers': '34.238.53.42:9092'}) def delivery_report(err, msg): if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) path_data_pokemon = "data/pokedex.json" data_s = open(path_data_pokemon, 'r' ) data_j = json.load(data_s) data_s.close() topic = "pokedex" try: while True: index = random.randint(0,len(data_j)-1) pokemon = data_j[index] print(pokemon) p.produce(topic, str(pokemon), callback=delivery_report) p.poll(2.0) except KeyboardInterrupt: pass
def notify_application_availability( account_number, application_id, availability_status, availability_status_error="" ): """ Update Sources application's availability status. The application's availability status is updated by Sources upon receiving the availability_status update request Kafka message. Args: account_number (str): Account number identifier application_id (int): Platform insights application id availability_status (string): Availability status to set availability_status_error (string): Optional status error """ if not settings.SOURCES_ENABLE_DATA_MANAGEMENT_FROM_KAFKA: logger.info( "Skipping notify_application_availability because " "settings.SOURCES_ENABLE_DATA_MANAGEMENT_FROM_KAFKA is not enabled." ) return sources_kafka_config = { "bootstrap.servers": f"{settings.LISTENER_SERVER}:{settings.LISTENER_PORT}" } payload = { "resource_type": settings.SOURCES_RESOURCE_TYPE, "resource_id": application_id, "status": availability_status, "error": availability_status_error, } logger.info( _( "Requesting the update of the availability status for application " "%(application_id)s as %(status)s" ), {"application_id": application_id, "status": availability_status}, ) try: if settings.VERBOSE_SOURCES_NOTIFICATION_LOGGING: logger.info( _("Instantiating KafkaProducer with %(sources_kafka_config)s"), {"sources_kafka_config": sources_kafka_config}, ) kafka_producer = KafkaProducer(sources_kafka_config) headers = identity.generate_http_identity_headers( account_number, is_org_admin=True ) message_topic = settings.SOURCES_STATUS_TOPIC message_value = json.dumps(payload) message_headers = { "x-rh-identity": headers["X-RH-IDENTITY"], "event_type": settings.SOURCES_AVAILABILITY_EVENT_TYPE, } if settings.VERBOSE_SOURCES_NOTIFICATION_LOGGING: logger.info( _( "KafkaProducer will produce with " "topic='%(topic)s' value='%(value)s' headers='%(headers)s'" ), { "topic": message_topic, "value": message_value, "headers": message_headers, }, ) kafka_producer.produce( topic=message_topic, value=message_value, headers=message_headers, callback=_check_response, ) if settings.VERBOSE_SOURCES_NOTIFICATION_LOGGING: logger.info(_("KafkaProducer produced!")) kafka_producer.flush() if settings.VERBOSE_SOURCES_NOTIFICATION_LOGGING: logger.info(_("KafkaProducer flushed!")) except BufferError as error: message = f"BufferError: {str(error)}" logger.exception(error) raise KafkaProducerException(message) except KafkaException as exception: message = f"KafkaException: {exception.args[0].str()}" logger.exception(exception) raise KafkaProducerException(message)
class ConfluentProducer(AbstractProducer): """ Concrete implementation of Confluent Kafka producer (confluent-kafka) """ def __init__(self, brokers: str, row_count: int, disable_progress_bar: bool): from confluent_kafka import Producer self.producer = Producer({"bootstrap.servers": brokers}) super().__init__(brokers, row_count, disable_progress_bar) def produce(self, topic: str, value: bytes) -> None: """ Generic produce that implements confluent-kafka's produce method to push a byte encoded object into a Kafka topic. Args: topic (str): Kafka topic. value (bytes): Byte encoded object. Returns: None: None. """ try: self.producer.produce(topic, value=value, callback=self._delivery_callback) # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. self.producer.poll(0) except Exception as ex: self._set_error(str(ex)) return None def flush(self, timeout: Optional[int]): """ Generic flush that implements confluent-kafka's flush method. Args: timeout (Optional[int]): Timeout in seconds to wait for completion. Returns: int: Number of messages still in queue. """ return self.producer.flush(timeout=timeout) def _delivery_callback(self, err: str, msg) -> None: """ Optional per-message delivery callback (triggered by poll() or flush()) when a message has been successfully delivered or permanently failed delivery (after retries). Although the msg argument is not used, the current method signature is required as specified in the confluent-kafka documentation. Args: err (str): Error message. msg (): Kafka message. Returns: None """ if err: self._set_error(err) else: self._inc_pbar(None)
# -*- coding: utf-8 -*- from confluent_kafka import Producer p = Producer({ 'bootstrap.servers': '127.0.0.1:9092', }) def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) some_data_source = ['124', '245'] for data in some_data_source: # Trigger any available delivery report callbacks from previous produce() calls p.poll(0) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. p.produce('mytopic', data.encode('utf-8'), callback=delivery_report) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. p.flush()
key = "/test" coolaid = '{"severity":"MAJOR", "standout":false, "talk":"MAJOR Alarm: We are out of purple cool aid."}' barbeque = '{"severity":"MINOR", "standout":false, "talk":"MINOR Alarm: We are out of barbecue sauce."}' tater_salad = '{"severity":"MAJOR", "standout":false, "talk":"MAJOR Alarm: We are out of potato salad."}' dont_ignore = '{"severity":"OK", "standout":true, "talk":"This message will not be ignored."}' print( "This demo expects there to be a kafka topic 'AcceleratorTalk' and for the annunciator threshold to be set at 4." ) # Demo the ability of the annunciator to receive a message. p.poll(0) p.produce(topic, coolaid.encode('utf-8'), key.encode('utf-8'), callback=delivery_report) p.flush() sleep(3) # Demo the ability of the annunciator to receive a message ignoring severity. p.poll(0) p.produce(topic, coolaid.encode('utf-8'), key.encode('utf-8'), callback=delivery_report) p.flush() sleep(3)
class KafkaWorkflowResultsSender(object): def __init__(self, execution_db, message_converter=ProtobufWorkflowResultsConverter, socket_id=None): self._ready = False self.id_ = socket_id kafka_config = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_CONFIG self.producer = Producer(kafka_config) self.execution_db = execution_db self.topic = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_TOPIC self.message_converter = message_converter if self.check_status(): self._ready = True def shutdown(self): self.producer.flush() @staticmethod def _delivery_callback(err, msg): if err is not None: logger.error('Kafka message delivery failed: {}'.format(err)) def _format_topic(self, event): return '{}.{}'.format(self.topic, event.name) def handle_event(self, workflow, sender, **kwargs): """Listens for the data_sent callback, which signifies that an execution element needs to trigger a callback in the main thread. Args: workflow (Workflow): The Workflow object that triggered the event sender (ExecutionElement): The execution element that sent the signal. kwargs (dict): Any extra data to send. """ event = kwargs['event'] if event in [WalkoffEvent.TriggerActionAwaitingData, WalkoffEvent.WorkflowPaused]: saved_workflow = SavedWorkflow.from_workflow(workflow) self.execution_db.session.add(saved_workflow) self.execution_db.session.commit() elif event == WalkoffEvent.ConsoleLog: action = workflow.get_executing_action() sender = action if self.id_: packet_bytes = self.message_converter.event_to_protobuf(sender, workflow, **kwargs) self.producer.produce(self._format_topic(event), packet_bytes, callback=self._delivery_callback) else: event.send(sender, data=kwargs.get('data', None)) def is_ready(self): return self._ready def check_status(self): if self.producer is not None: return True return False def send_ready_message(self): WalkoffEvent.CommonWorkflowSignal.send(sender={'id': '1'}, event=WalkoffEvent.WorkerReady) def create_workflow_request_message(self, workflow_id, workflow_execution_id, start=None, start_arguments=None, resume=False, environment_variables=None, user=None): return self.message_converter.create_workflow_request_message(workflow_id, workflow_execution_id, start, start_arguments, resume, environment_variables, user)
class GetRecommendations: def __init__(self, token, optimalq_connector, pool_uid, snooze_seconds, consumer_group, recommendations_amount_topic, recommendations_topic): self._producer = Producer({"bootstrap.servers": "", "security.protocol": "SASL_SSL", "sasl.mechanisms": "PLAIN", "sasl.username": "", "sasl.password": ""}) self._consumer = Consumer({ "bootstrap.servers": "", "security.protocol": "SASL_SSL", "sasl.mechanisms": "PLAIN", "sasl.username": "", "sasl.password": "", 'group.id': consumer_group, 'enable.auto.commit': True, 'auto.offset.reset': 'earliest' }) self._consumer.subscribe([recommendations_amount_topic]) self._headers = {"X-Auth-Token": "{}".format(token), "Content-Type": "application/json"} self._optimalq_connector = optimalq_connector self._pool_uid = pool_uid self._snooze_seconds = snooze_seconds self._optimalq_url = '' self._recommendations_topic = recommendations_topic def start(self): """ Get messages from recommendations_topic. Send the leads actions to self.get_optimal() """ while True: msg = self._consumer.poll(0.1) if msg is None: continue elif not msg.error(): # Received message self.get_optimal(msg.value()) self._consumer.commit() elif msg.error().code() == KafkaError._PARTITION_EOF: logging.info('End of partition reached {}/{}'.format(msg.topic(), msg.partition())) else: logging.error('Error occurred: {}'.format(msg.error().str())) def get_optimal(self, amount): """ Get Optimal leads from OptimalQ API and send them to self.send_optimal_recommendations() :param amount: """ try: amount = int(amount) url = '{}/v1/pools/{}/leads/optimal?count={}&SnoozeSeconds={}'\ .format(self._optimalq_url, self._pool_uid, amount, self._snooze_seconds) optimal_results = requests.get(url, headers=self._headers) code = optimal_results.status_code counter = 5 while (counter > 0) and ((code < 200) or (code > 299)): counter -= 1 token = self._optimalq_connector.get_token() if token is not None: self._headers = {"X-Auth-Token": "{}".format(token), "Content-Type": "application/json"} optimal_results = requests.get(url, headers=self._headers) code = optimal_results.status_code if (code > 199) and (code < 300): logging.info('Get optimal leads for pool: {}'.format(self._pool_uid)) self.send_optimal_recommendations(optimal_results) return logging.error( 'Connection to OptimalQ failed while trying to get {} optimal leads for pool {}, code: {}, error: {}'.format( amount, self._pool_uid, code, optimal_results.content)) except Exception as ex: logging.exception("Exception while getting {} optimalq leads from OptimalQ - {}".format(amount, ex)) def send_optimal_recommendations(self, recommendations): self._producer.produce(self._recommendations_topic, json.dumps(recommendations.json()), callback=KafkaUtils.self.delivery_report) self._producer.poll(0) def terminate(self): self._producer.flush()
# Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). def delivery_callback(err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d] @ %d\n' % (msg.topic(), msg.partition(), msg.offset())) # Read lines from stdin, produce each line to Kafka for line in sys.stdin: try: # Produce line (without newline) p.produce(topic, line.rstrip(), callback=delivery_callback) except BufferError: sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(p)) # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. p.poll(0) # Wait until all messages have been delivered sys.stderr.write('%% Waiting for %d deliveries\n' % len(p)) p.flush()
class TorchExecutor(Process): def __init__(self, uuid, egg): super(TorchExecutor, self).__init__() self.uuid = uuid self.egg = egg buffer = io.BytesIO(egg.eggData) self.model = torch.load(buffer) x_windowing = 48 # FIXME: Hardcode self.window = deque(maxlen=x_windowing) def run(self): try: self.producer = Producer({'bootstrap.servers': '127.0.0.1:9092'}) self.consumer = Consumer({ 'bootstrap.servers': '127.0.0.1:9092', 'group.id': 'pytorch:' + self.uuid, 'auto.offset.reset': 'earliest' }) self.consumer.subscribe(list(self.egg.inputs)) while True: msg = self.consumer.poll() if msg is None: continue if msg.error(): if msg.error().code() != KafkaError._MAX_POLL_EXCEEDED: print("Consumer error: {}".format(msg.error())) continue blob = Blob() blob.ParseFromString(msg.value()) data = np.array(blob.data) data = data.reshape([1] + list(blob.shape.dimension)[::-1]) self.window.append(data) if len(self.window) == self.window.maxlen: # TODO: No idea how this works, but it does. If it breaks, it likely broke right here. windowed_data = np.stack(self.window, axis=2) result = self.model( torch.from_numpy(windowed_data).float()) result = result.detach().numpy() # FIXME: Torch models are improperly saved/loaded, and thus the final result has shape (1, 2), instead of (1,). So, we have to magically remap that to 0..1. try: abs_result = np.absolute(result[0]) blob = numpy_to_blob( abs_result[1] / (abs_result[0] + abs_result[1]) + (np.random.random_sample() - 0.5) * 0.05) except Exception: blob = numpy_to_blob(result) self.producer.poll(0) self.producer.produce(self.egg.uuid, key=msg.key(), value=blob.SerializeToString()) finally: self.producer.flush() self.consumer.close()
def publish(request): elasticsearch_server = 'http://localhost:9200/clincoded' return_object = {'status': 'Fail', 'message': 'Unable to deliver message'} # Check that required parameters have been provided if not('type' in request.params and 'uuid' in request.params): return_object['message'] = 'Required parameters missing in request' return return_object # Attempt to retrieve data (from Elasticsearch) try: searchRes = requests.get('{}/{}/{}'.format(elasticsearch_server, request.params['type'], request.params['uuid']), timeout=10) if searchRes.status_code != requests.codes.ok: return_object['message'] = 'Data search failed' return return_object except Exception as e: return_object['message'] = 'Data search could not be completed' return return_object # Store JSON-encoded content of search result(s) try: resultJSON = searchRes.json() except Exception as e: return_object['message'] = 'Retrieved data not in expected format' return return_object # Check that search found data if 'found' not in resultJSON or not(resultJSON['found']): return_object['message'] = 'Requested data could not be found' return return_object # Check that data has expected elements try: data_type_to_publish = resultJSON['_source']['embedded']['resourceType'] if data_type_to_publish == 'classification': evidence_to_publish = resultJSON['_source']['embedded']['resourceParent']['gdm'] publishing_affiliation = resultJSON['_source']['embedded']['resource']['affiliation'] evidence_counts_to_publish = resultJSON['_source']['embedded']['resource']['classificationPoints'] elif data_type_to_publish == 'interpretation': evidence_to_publish = resultJSON['_source']['embedded']['resourceParent']['interpretation'] else: raise Exception except Exception as e: return_object['message'] = 'Retrieved data missing expected elements' return return_object # Check that message should be sent? (approved status? permission to publish?) # Construct message try: if data_type_to_publish == 'interpretation': message_template = deepcopy(clincoded.messaging.templates.vci_to_dx.message_template) data_to_remove = clincoded.messaging.templates.vci_to_dx.data_to_remove add_data_to_msg_template(resultJSON['_source']['embedded'], None, None, message_template) else: message_template = deepcopy(clincoded.messaging.templates.gci_to_dx.message_template) classification_points = deepcopy(evidence_counts_to_publish) add_data_to_msg_template(resultJSON['_source']['embedded'], gather_evidence(evidence_to_publish, publishing_affiliation), gather_evidence_counts(classification_points, True), message_template) message = json.dumps(message_template, separators=(',', ':')) except Exception as e: return_object['message'] = 'Failed to build complete message' return return_object # Transform message (if necessary, via independent service) try: if data_type_to_publish == 'interpretation': remove_data_from_msg_template(data_to_remove, message_template['interpretation']) message_template['interpretation'] = transform_interpretation(message_template['interpretation'], request.host) message = json.dumps(message_template, separators=(',', ':')) except Exception as e: if e.args: return_object['message'] = e.args else: return_object['message'] = 'Failed to build complete message' return return_object # Configure message delivery parameters kafka_cert_pw = '' if 'KAFKA_CERT_PW' in os.environ: kafka_cert_pw = os.environ['KAFKA_CERT_PW'] kafka_conf = {'bootstrap.servers': 'localhost:9093', 'log_level': 0, 'security.protocol': 'ssl', 'ssl.key.location': 'etc/certs/client.key', 'ssl.key.password': kafka_cert_pw, 'ssl.certificate.location': 'etc/certs/client.crt', 'ssl.ca.location': 'etc/certs/server.crt'} kafka_topic = 'test' kafka_timeout = 10 if request.host != 'localhost:6543': kafka_conf = {'bootstrap.servers': 'exchange.clinicalgenome.org:9093', 'log_level': 0, 'security.protocol': 'ssl', 'ssl.key.location': 'etc/certs/dataexchange/client.key', 'ssl.key.password': kafka_cert_pw, 'ssl.certificate.location': 'etc/certs/dataexchange/client.crt', 'ssl.ca.location': 'etc/certs/dataexchange/server.crt'} if data_type_to_publish == 'interpretation': kafka_topic = 'variant_interpretation' else: kafka_topic = 'gene_validity' if request.host != 'curation.clinicalgenome.org': kafka_topic += '_dev' # Send message p = Producer(**kafka_conf) def delivery_callback(err, msg): nonlocal return_object if err: return_object['message'] = err else: return_object = {'status': 'Success', 'message': message, 'partition': msg.partition(), 'offset': msg.offset()} try: p.produce(kafka_topic, message, callback=delivery_callback) p.flush(kafka_timeout) return return_object except Exception as e: return_object['message'] = 'Message delivery failed' return return_object
config = dict(config_parser['default']) # Create Producer instance producer = Producer(config) # Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). def delivery_callback(err, msg): if err: print('ERROR: Message failed delivery: {}'.format(err)) else: print("Produced event to topic {topic}: key = {key:12} value = {value:12}".format( topic=msg.topic(), key=msg.key().decode('utf-8'), value=msg.value().decode('utf-8'))) # Produce data by selecting random values from these lists. topic = "purchases" user_ids = ['eabara', 'jsmith', 'sgarcia', 'jbernard', 'htanaka', 'awalther'] products = ['book', 'alarm clock', 't-shirts', 'gift card', 'batteries'] count = 0 for _ in range(10): user_id = choice(user_ids) product = choice(products) producer.produce(topic, product, user_id, callback=delivery_callback) count += 1 # Block until the messages are sent. producer.poll(10000) producer.flush()
class ApplicationBase(object): """ Sample application to test registration and communication with Spectrum Discover. This script expect configuration parameters to be specified as environment variables. SPECTRUM_DISCOVER_HOST ..... Spectrum Discover server (domain, IP address) - default: https://localhost APPLICATION_NAME ................. The name of the application to be registered - default: sd_sample_application APPLICATION_USER ................. The user who is used to obtain authentication token APPLICATION_USER_PASSWORD KAFKA_DIR .................. The directory where TLS certificates will be saved - absolute or relative path - default: kafka (relative path to this script) LOG_LEVEL .................. Log verbosity level (ERROR, WARNING, INFO, DEBUG) - default: DEBUG """ def __init__(self, reg_info): self.reg_info = reg_info.copy() # Instantiate logger loglevels = { 'INFO': logging.INFO, 'DEBUG': logging.DEBUG, 'ERROR': logging.ERROR, 'WARNING': logging.WARNING } log_level = os.environ.get('LOG_LEVEL', 'INFO') log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' logging.basicConfig(stream=sys.stdout, format=log_format, level=loglevels[log_level]) self.logger = logging.getLogger(__name__) env = lambda envKey, default: os.environ.get(envKey, default) # This application name self.application_name = env('APPLICATION_NAME', 'sd_sample_application') self.is_kube = os.environ.get('KUBERNETES_SERVICE_HOST') is not None self.is_docker = os.environ.get('IS_DOCKER_CONTAINER', False) self.cipher = AesCipher() # The user account assigned to this application self.application_token = None # Spectrum discover host application talks to self.sd_api = env('SPECTRUM_DISCOVER_HOST', 'https://localhost') if self.is_kube: self.application_user = env('DB2WHREST_USER', '') self.application_user_password = env('DB2WHREST_PASSWORD', '') self.sd_auth = env('AUTH_SERVICE_HOST', 'http://auth.spectrum-discover') self.sd_policy = self._create_host_from_env( 'POLICY_SERVICE_HOST', 'POLICY_SERVICE_PORT', 'POLICY_PROTOCOL') self.sd_connmgr = self._create_host_from_env( 'CONNMGR_SERVICE_HOST', 'CONNMGR_SERVICE_PORT', 'CONNMGR_PROTOCOL') else: self.application_user = env('APPLICATION_USER', '') self.application_user_password = env('APPLICATION_USER_PASSWORD', '') self.sd_policy = env('POLICYENGINE_HOST', self.sd_api) self.sd_connmgr = env('CONNMGR_HOST', self.sd_api) self.sd_auth = env('AUTH_HOST', self.sd_api) # Endpoints used by application policyengine_endpoint = partial(urljoin, self.sd_policy) connmgr_endpoint = partial(urljoin, self.sd_connmgr) auth_endpoint = partial(urljoin, self.sd_auth) self.identity_auth_url = auth_endpoint('auth/v1/token') self.registration_url = policyengine_endpoint( 'policyengine/v1/applications') self.certificates_url = policyengine_endpoint( 'policyengine/v1/tlscert') self.connmgr_url = connmgr_endpoint('connmgr/v1/internal/connections') # Certificates directory and file paths cert_dir = env('KAFKA_DIR', 'kafka') if not os.path.isabs(cert_dir): cert_dir = os.path.join(os.getcwd(), cert_dir) self.certificates_dir = os.path.normpath(cert_dir) cert_path = partial(os.path.join, self.certificates_dir) self.kafka_client_cert = cert_path("kafka_client.crt") self.kafka_client_key = cert_path("kafka_client.key") self.kafka_root_cert = cert_path("kafka-ca.crt") # Kafka config - this info comes from registration endpoint self.work_q_name = '%s_work' % self.application_name self.compl_q_name = '%s_compl' % self.application_name # Application running status self.application_enabled = False # Function that handles messages from Spectrum Discover self.message_handler = None # a mapping dict of connection to client self.connections = {} self.logger.info("Initialize to host: %s", self.sd_api) self.logger.info("Application name: %s", self.application_name) self.logger.info("Application user: %s", self.application_user) self.logger.info("Certificates directory: %s", self.certificates_dir) if not self.application_user: raise Exception( "Authentication requires APPLICATION_USER and APPLICATION_USER_PASSWORD" ) def _create_host_from_env(self, host, port, protocol): host = os.environ.get(host, 'localhost') protocol = os.environ.get(protocol, 'http') port = os.environ.get(port, '80') return ('%(protocol)s://%(host)s:%(port)s/' % { 'protocol': protocol, 'host': host, 'port': port }) def register_application(self): """ Attempt to self-register an application and receive an application registration response. If the application is already registered a 409 will be returned, which means another instance of this application is already registered. In that case the application should attempt a GET request to registration endpoint. """ if self.is_kube: headers = { 'Content-Type': 'application/json', 'X-ALLOW-BASIC-AUTH-SD': 'true' } auth = requests.auth.HTTPBasicAuth(self.application_user, self.application_user_password) else: # Get authentication token if not present if not self.application_token: self.obtain_token() headers = { 'Content-Type': 'application/json', 'Authorization': 'Bearer %s' % self.application_token } auth = None # Registration request info (insert application name) self.reg_info.update({"action_agent": self.application_name}) def raise_except_http(valid_codes, http_code): if http_code not in valid_codes: raise Exception("application:%s, error:%d" % (self.application_name, http_code)) def post_register(): response = requests.post(url=self.registration_url, verify=False, json=self.reg_info, headers=headers, auth=auth) raise_except_http([200, 201, 409], response.status_code) if response.status_code == 409: self.logger.warn( 'Application already registered, initiating GET request (application:%s)' % self.application_name) return get_register() return response.json() def get_register(): response = requests.get(url=self.registration_url, verify=False, headers=headers, auth=auth) raise_except_http([200], response.status_code) # GET response returns list of registrations reg_list = response.json() if not reg_list: raise Exception( 'Application GET registration empty - (application:%s)' % self.application_name) for reg in reg_list: if reg['agent'] == self.application_name: return reg try: resp_json = post_register() self.update_registration_info(resp_json) except Exception as e: self.logger.error( Exception('Application POST registration request FAIL - (%s)' % str(e))) raise def update_registration_info(self, reg_response): # Record topic names and broker IP/port self.kafka_ip = reg_response['broker_ip'] self.kafka_port = reg_response['broker_port'] self.work_q_name = reg_response['work_q'] self.compl_q_name = reg_response['completion_q'] self.kafka_host = "%s:%s" % (self.kafka_ip, self.kafka_port) self.logger.info("Application is registered") self.logger.info("Kafka host: %s" % self.kafka_host) self.logger.info("Application attached to work queue: %s" % self.work_q_name) self.logger.info("Application attached to compl queue: %s" % self.compl_q_name) def get_kafka_certificates(self): """ Download the client certificate, client key, and CA root certificate via REST API, parse response and save certificates to files. """ self.logger.info("Download certificates and save to files") response = self.download_certificates() cert_pattern = "-----BEGIN CERTIFICATE-----[^-]+-----END CERTIFICATE-----" key_pattern = "-----BEGIN PRIVATE KEY-----[^-]+-----END PRIVATE KEY-----" certs_regex = "(%s)[\n\r]*([^-]+%s)[\n\r]*(%s)" % ( cert_pattern, key_pattern, cert_pattern) certs = match(certs_regex, response.decode('utf-8')) if not certs: raise Exception("Cannot parse certificates from response: %s" % response) client_cert, client_key, ca_root_cert = certs.groups() # Create certificates directory if not exist if not os.path.exists(self.certificates_dir): os.makedirs(self.certificates_dir) elif not os.path.isdir(self.certificates_dir): raise Exception("Certificates path is not a directory (%s)" % self.certificates_dir) def save_file(file_path, content): self.logger.info("Save file: %s", file_path) with open(file_path, 'w') as f: f.write(content) save_file(self.kafka_client_cert, client_cert) save_file(self.kafka_client_key, client_key) save_file(self.kafka_root_cert, ca_root_cert) def download_certificates(self): """ Download the client certificate, client key, and CA root certificate via REST API to be imported into the application's trust store. """ self.logger.info("Loading certificates from server: %s", self.certificates_url) # Get authentication token if not present if self.is_kube: headers = { 'Content-Type': 'application/json', 'X-ALLOW-BASIC-AUTH-SD': 'true' } auth = requests.auth.HTTPBasicAuth(self.application_user, self.application_user_password) else: # Get authentication token if not present if not self.application_token: self.obtain_token() headers = { 'Content-Type': 'application/json', 'Authorization': 'Bearer %s' % self.application_token } auth = None try: response = requests.get(url=self.certificates_url, verify=False, headers=headers, auth=auth) self.logger.debug("CA server response (%s)" % response) # Return certificates data if response.ok: return response.content except requests.exceptions.HTTPError as e: err = "Http Error :: %s " % e except requests.exceptions.ConnectionError as e: err = "Error Connecting :: %s " % e except requests.exceptions.Timeout as e: err = "Timeout Error :: %s " % e except requests.exceptions.RequestException as e: err = "Request Error :: %s " % e except Exception as e: err = "Request Error :: %s " % str(e) raise Exception(err) def configure_kafka(self): # Instantiate producer p_conf = { 'bootstrap.servers': '%s' % self.kafka_host, 'ssl.certificate.location': self.kafka_client_cert, 'ssl.key.location': self.kafka_client_key, 'security.protocol': 'ssl', 'ssl.ca.location': self.kafka_root_cert } self.kafka_producer = Producer(p_conf) # Instantiate consumer c_conf = { 'bootstrap.servers': '%s' % self.kafka_host, 'group.id': 'myagent_grp', 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'smallest' }, 'ssl.certificate.location': self.kafka_client_cert, 'ssl.key.location': self.kafka_client_key, 'enable.auto.commit': 'false', 'security.protocol': 'ssl', 'ssl.ca.location': self.kafka_root_cert } self.kafka_consumer = Consumer(c_conf) def obtain_token(self): """ Any application SDK requests to policy engine require role based token authentication. The user role assigned to this application must have an authenticated account on the server created externally by an admin. """ self.logger.info('Application obtaining token from URL: %s' % self.identity_auth_url) try: headers = {} if self.is_kube: headers['X-ALLOW-BASIC-AUTH-SD'] = 'true' basic_auth = requests.auth.HTTPBasicAuth( self.application_user, self.application_user_password) response = requests.get(url=self.identity_auth_url, verify=False, headers=headers, auth=basic_auth) # check response from identity auth server if response.status_code == 200: self.application_token = response.headers['X-Auth-Token'] self.logger.info('Application token retrieved: %s...' % self.application_token[:10]) return self.application_token raise Exception("Attempt to obtain token returned (%d)" % response.status_code) except Exception as e: self.logger.error('Application failed to obtain token (%s)' % str(e)) raise return def start_kafka_listener(self): self.logger.info("Looking for new work on the %s topic ..." % self.work_q_name) self.kafka_consumer.subscribe([self.work_q_name]) while True: # Poll message from Kafka json_request_msg = self.kafka_consumer.poll(timeout=10.0) request_msg = self.decode_msg(json_request_msg) if request_msg: self.logger.debug("Job Request Message: %s", request_msg) try: # Process the message with the implementation provided by the client response_msg = self.on_application_message(request_msg) except Exception as e: self.logger.error("Error processing Kafka message: %s" % str(e)) continue if response_msg: self.logger.debug( "Submitting completion status batch to topic %s", self.compl_q_name) try: json_response_msg = json.dumps(response_msg) self.kafka_producer.produce(self.compl_q_name, json_response_msg) self.kafka_producer.flush() except Exception: self.logger.error( "Could not produce message to topic '%s'" % self.compl_q_name) self.logger.error("--| Job Response Message: %s" % json_response_msg) if not self.application_enabled: break def decode_msg(self, msg): # Decode JSON message and log errors if msg: if not msg.error(): return json.loads(msg.value().decode('utf-8')) elif msg.error().code() != KafkaError._PARTITION_EOF: self.logger.error(msg.error().code()) def on_application_message(self, message): """ Implement this method is all that is needed to implement custom application. This methid will be called when Kafka consumer receives a message. If value is returned from this method it will be delivered to Kafka producer. """ if self.message_handler: return self.message_handler(self, message) self.logger.warn( "Please implement `on_application_message` method or supply" " function to the `subscribe` method to process application messages." ) def subscribe(self, message_handler): """ Subscribe to Spectrum Discover messages. Supplied function is called when Kafka consumer receives a message. If value is returned from this function it will be delivered to Kafka producer. """ if message_handler: self.message_handler = message_handler def get_connection_details(self): """ Read the connection details from Spectrum Discover, and store them for future file retrieval. May require setup - sftp connections or nfs mounts. """ self.logger.debug("Querying information for connections") try: headers = {} self.logger.info("Invoking conn manager at %s", self.connmgr_url) if self.is_kube: headers['X-ALLOW-BASIC-AUTH-SD'] = 'true' auth = requests.auth.HTTPBasicAuth( self.application_user, self.application_user_password) else: if not self.application_token: self.obtain_token() headers['Authorization'] = 'Bearer ' + self.application_token auth = None response = requests.get(url=self.connmgr_url, verify=False, headers=headers, auth=auth) self.logger.debug("Connection Manager response (%s)", response) # return certificate data if (response.ok): return json.loads(response.content) except requests.exceptions.HTTPError as e: err = "Http Error :: %s " % e except requests.exceptions.ConnectionError as e: err = "Error Connecting :: %s " % e except requests.exceptions.Timeout as e: err = "Timeout Error :: %s " % e except requests.exceptions.RequestException as e: err = "Request Error :: %s " % e except Exception as e: err = "Request Error :: %s " % str(e) raise Exception(err) def call_manager_api(self, url, manager_username, manager_password): """Execute a GET on the Manager API and handle the response.""" try: response = requests.get(url, auth=(manager_username, manager_password)) if response is None: self.logger.error( "This manager site cannot be reached: {}. ".format(url)) if not response.ok: self.logger.error( "Failed to connect to {}. Response status: {} {}. ".format( url, response.status_code, response.reason)) return response except Exception as err: self.logger.error("Error type %s when getting COS credentials" % type(err)) def manager_api_get_aws_keys(self, manager_ip, manager_username, manager_password): """Get AWS keys from the manager API. Calls the manager api listMyAccessKeys.adm. """ url = "https://{0}/manager/api/json/1.0/listMyAccessKeys.adm".format( manager_ip) response = self.call_manager_api(url, manager_username, manager_password) try: # Get the first access/secret key. keys = response.json()['responseData']['accessKeys'] if keys: self.logger.info( "Accesser credentials successfully retrieved from Manager API" ) accesser_access_key = keys[0]['accessKeyId'] accesser_secret_key = keys[0]['secretAccessKey'] else: accesser_access_key = None accesser_secret_key = None return (accesser_access_key, accesser_secret_key) except Exception as err: self.logger.error("Error type %s when parsing COS credentials", type(err)) return (None, None) def create_cos_connection(self, conn): additional_info = json.loads(conn['additional_info']) aws_access_key_id = additional_info.get('accesser_access_key', None) aws_secret_access_key = additional_info.get('accesser_secret_key', None) try: if not aws_access_key_id or not aws_secret_access_key: # If access keys are not supplied and manager credentials are # then retrieve the keys via the management interface manager_username = additional_info.get('manager_username', None) manager_password = additional_info.get('manager_password', None) if conn['host'] and manager_username and manager_password: manager_password = self.cipher.decrypt(manager_password) (aws_access_key_id, aws_secret_access_key) = self.manager_api_get_aws_keys( conn['host'], manager_username, manager_password) else: aws_secret_access_key = self.cipher.decrypt( aws_secret_access_key) except Exception as err: log_error( "Credentials problem '%s' with COS connection %s" % (str(err), conn['name']), 'MAIN') client = boto3.client('s3', endpoint_url='http://' + additional_info['accesser_address'], aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) self.connections[(conn['datasource']), conn['cluster']] = ('COS', client) def mount_nfs(self, local_mount, host): """Mount the NFS file system.""" if not host: raise IOError('Host not defined so cannot create NFS mount.') if not os.path.ismount(local_mount): try: check_call( 'mkdir -p {local_mount}'.format(local_mount=local_mount), shell=True) check_call( 'mount -t nfs -o nolock -o ro {host} {local_mount}'.format( host=host, local_mount=local_mount), shell=True) self.logger.info('Mounted remote NFS folder %s', host) except CalledProcessError: # Not fatal, this might not be an active connection self.logger.error('Failed to mount remote NFS folder %s', host) def create_nfs_connection(self, conn): additional_info = json.loads(conn['additional_info']) remote_nfs_mount = conn['host'] + ':' + conn['mount_point'] mount_path_prefix = additional_info['local_mount'] self.mount_nfs(mount_path_prefix, remote_nfs_mount) # need to store this to correlate connections in work messages conn['additional_info'] = additional_info self.connections[(conn['datasource']), conn['cluster']] = ('NFS', conn) def create_scale_connection(self, conn): """Connect to remote host using shared RSA key.""" if conn['online']: try: xport = paramiko.Transport(conn['host']) if self.is_kube or self.is_docker: pkey = paramiko.RSAKey.from_private_key_file( '/keys/id_rsa') elif os.path.exists('/gpfs/gpfs0/connections/scale/id_rsa'): pkey = paramiko.RSAKey.from_private_key_file( '/gpfs/gpfs0/connections/scale/id_rsa') else: # Assume running locally on scale node self.connections[(conn['datasource']), conn['cluster']] = ( 'Spectrum Scale Local', conn) return xport.connect(username=conn['user'], pkey=pkey) sftp = paramiko.SFTPClient.from_transport(xport) if sftp: self.connections[(conn['datasource']), conn['cluster']] = ('Spectrum Scale', sftp) except (paramiko.ssh_exception.BadHostKeyException, paramiko.ssh_exception.AuthenticationException, paramiko.ssh_exception.SSHException, paramiko.ssh_exception.NoValidConnectionsError) as ex: self.logger.error('Error when attempting Scale connection: %s', str(ex)) def connect_to_datasources(self): self.conn_details = self.get_connection_details() for conn in self.conn_details: if conn['platform'] == "IBM COS": if self.is_kube and os.environ.get('CIPHER_KEY'): self.create_cos_connection(conn) else: self.logger.warn( "COS connections are only supported within kubernetes pods. Skipping connection: %s", conn['datasource']) elif conn['platform'] == "NFS": self.create_nfs_connection(conn) elif conn['platform'] == "Spectrum Scale": self.create_scale_connection(conn) else: self.logger.error("Unsupported connection platform %s", conn['platform']) def start(self): self.logger.info("Starting Spectrum Discover application...") # Set application running status self.application_enabled = True # Register this application to Spectrum Discover self.register_application() # Get Kafka certificates from Spectrum Discover self.get_kafka_certificates() # Get connections for data retrieval self.connect_to_datasources() # Instantiate Kafka producer and consumer self.configure_kafka() # Auth token will expire, remove existing so that later requests will get the new one self.application_token = None def stop(self): self.logger.info("Stopping Spectrum Discover application...") # Disable application self.application_enabled = False
def delivery_report(err, msg): if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) fake = Faker('en_US') def gen_ran_data(i): data = {} data["ID"] = i data["name"] = fake.name() data["address"] = fake.address() data["Email-ID"] = fake.safe_email() return data for i in range(0, 1000): x = json.dumps(gen_ran_data(i)) print(x) p.poll(0) p.produce('sampleTopic', x.encode('utf-8'), callback=delivery_report) p.flush()
async def produce(topic_name): """Produces data into the Kafka Topic""" p = Producer({"bootstrap.servers": BROKER_URL}) while True: p.produce(topic_name, ClickEvent().serialize()) await asyncio.sleep(1.0)
if __name__ == '__main__': # 步驟1. 設定要連線到Kafka集群的相關設定 props = { # Kafka集群在那裡? 'bootstrap.servers': 'localhost:9092', # <-- 置換成要連接的Kafka集群 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Producer的實例 producer = Producer(**props) # 步驟3. 指定想要發佈訊息的topic名稱 topicName = 'ak03.fourpartition' msgCount = 10000 try: print('Start sending messages ...') # produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers]) for i in range(0, msgCount): producer.produce(topicName, key=str(i), value='msg_' + str(i)) producer.poll(0) # <-- (重要) 呼叫poll來讓client程式去檢查內部的Buffer print('key={}, value={}'.format(str(i), 'msg_' + str(i))) time.sleep(3) # 讓主執行緒停個3秒 print('Send ' + str(msgCount) + ' messages to Kafka') except BufferError as e: # 錯誤處理 sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer)) except Exception as e: print(e) # 步驟5. 確認所有在Buffer裡的訊息都己經送出去給Kafka了 producer.flush(10) print('Message sending completed!')
help="Kafka servers", ) (options, _) = parser.parse_args() p = Producer({"bootstrap.servers": options.servers}) def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print("Message delivery failed: {}".format(err)) else: print("Message delivered to {} [{}]".format(msg.topic(), msg.partition())) for data in ["test"]: # Trigger any available delivery report callbacks from previous produce() calls p.poll(0) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. p.produce(options.topic, data.encode("utf-8"), callback=delivery_report) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. p.flush()
while True: try: sc_response = sc.conversations_history(channel=channel) for msg in sc_response["messages"]: if msg["ts"] not in posts: # 없는 메시지 posts[msg["ts"]] = True if "bug" in msg["text"].lower(): # bug를 포함한 글임 print("Someone posted a bug...") name = sc.users_info(user=msg["user"])["user"][ "name"] # user id를 name으로 변환 data = {"USER": name, "TEXT": msg["text"]} # 데이터 Consumer에게 전송 p.produce( Config.SLACK_TOPID_ID, value=json.dumps(data), callback=acked, ) p.poll(0.5) else: # 파일에 저장할 수도 continue except SlackApiError as e: assert e.response["ok"] is False print("\t** FAILED: %s" % e.response["error"]) except Exception as e: print(type(e)) print(dir(e)) finally:
import time from confluent_kafka import Producer p = Producer({'bootstrap.servers': 'localhost:29092'}) messages = ["message1","message2","message3"] def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) while True: for data in messages: # Trigger any available delivery report callbacks from previous produce() calls p.poll(0) microseconds_since_epoc = milliseconds = int((time.time()) * 1000) p.produce('test_topic', data.encode('utf-8'), callback=delivery_report, timestamp=microseconds_since_epoc) time.sleep(.100) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. p.flush()
class ProducerWrapper: """ A wrapper class for the kafka producer. """ def __init__(self, server, config_topic, data_topic): self.topic = config_topic self.converter = ForwarderConfig(data_topic) self._set_up_producer(server) def _set_up_producer(self, server): conf = {"bootstrap.servers": server} try: self.producer = Producer(**conf) if not self.topic_exists(self.topic, server): print( "WARNING: topic {} does not exist. It will be created by default." .format(self.topic)) except KafkaException.args[0] == "_BROKER_NOT_AVAILABLE": print("No brokers found on server: " + server[0]) quit() except KafkaException.args[0] == "_TIMED_OUT": print("No server found, connection error") quit() except KafkaException.args[0] == "_INVALID_ARG": print("Invalid configuration") quit() except KafkaException.args[0] == "_UNKNOWN_TOPIC": print( "Invalid topic, to enable auto creation of topics set" " auto.create.topics.enable to false in broker configuration") quit() def add_config(self, pvs): """ Create a forwarder configuration to add more pvs to be monitored. :param pvs:(list) A list of new PVs to add to the forwarder configuration. :return: None """ data = self.converter.create_forwarder_configuration(pvs) print("Sending data {}".format(data)) self.producer.produce(self.topic, value=data) self.producer.flush() @staticmethod def topic_exists(topicname, server): conf = {"bootstrap.servers": server, "group.id": uuid.uuid4()} consumer = Consumer(**conf) try: consumer.subscribe([topicname]) consumer.close() except KafkaException as e: print("topic '{}' does not exist".format(topicname)) print(e) return False return True def remove_config(self, pvs): """ Create a forwarder configuration to remove pvs that are being monitored. :param pvs:(list) A list of PVs to remove from the forwarder configuration. :return: None """ data = self.converter.remove_forwarder_configuration(pvs) for pv in data: print("Sending data {}".format(pv)) self.producer.produce(self.topic, value=pv) def stop_all_pvs(self): """ Sends a stop_all command to the forwarder to clear all configuration. :return: None """ self.producer.produce(self.topic, value='{"cmd": "stop_all"}')
class ConfluentKafkaMsgQAPI: """ This class provides API's into interact with Kafka Queue. """ def __init__(self, is_producer=False, is_consumer=False, perform_subscription=False, thread_identifier=None): if not is_producer and not is_consumer: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer." ) pass self.producer_instance = None self.consumer_instance = None self.broker_name = None self.topic = None self.producer_conf = None self.consumer_conf = None self.is_topic_created = False self.perform_subscription = perform_subscription self.thread_identifier = thread_identifier self.__read_environment_variables() if is_producer: self.__producer_connect() if is_consumer: self.__consumer_connect() def __read_environment_variables(self): """ This method is used to read the environment variables defined in the OS. :return: """ while self.broker_name is None or \ self.topic is None: time.sleep(2) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: " "Trying to read the environment variables...") self.broker_name = os.getenv("broker_name_key", default=None) self.topic = os.getenv("topic_key", default=None) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name)) logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format( self.topic)) # Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). @staticmethod def delivery_callback(err, msg): if err: logging_to_console_and_syslog('%% Message failed delivery: %s\n' % err) else: logging_to_console_and_syslog( '%% Message delivered to %s [%d] @ %s\n' % (msg.topic(), msg.partition(), str(msg.offset()))) def __producer_connect(self): """ This method tries to connect to the kafka broker based upon the type of kafka. :return: """ while self.producer_instance is None: try: self.producer_conf = {'bootstrap.servers': self.broker_name} # Create Producer instance self.producer_instance = Producer(**self.producer_conf) except: print("Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) time.sleep(5) else: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Successfully " "connected to broker_name={}".format(self.broker_name)) def __consumer_connect(self): status = False try: if self.perform_subscription: self.__consumer_connect_to_broker() self.__subscribe_to_a_topic() # self.__iterate_over_kafka_consumer_instance_messages() else: self.__consumer_connect_to_kafka_broker_and_to_a_topic() # self.__consumer_poll_for_new_messages() status = True except: logging_to_console_and_syslog( "{}:Exception occurred while polling for " "a message from kafka Queue. {} ".format( self.thread_identifier, sys.exc_info()[0])) print("{}:Exception in user code:".format(self.thread_identifier)) print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) return status def enqueue(self, filename): """ This method tries to post a message to the pre-defined kafka topic. :param filename: :return status False or True: """ status = False if filename is None or len(filename) == 0: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: filename is None or invalid") return status if self.producer_instance is None: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: instance is None") return status if not self.is_topic_created: try: if self.producer_instance.list_topics(self.topic, timeout=1.0): logging_to_console_and_syslog( "Found topic name = {} in the zookeeper.".format( self.topic)) self.is_topic_created = True except KafkaException: self.kafka_admin_client = admin.AdminClient(self.producer_conf) logging_to_console_and_syslog("Creating topic {}.".format( self.topic)) ret = self.kafka_admin_client.create_topics( new_topics=[ admin.NewTopic(topic=self.topic, num_partitions=1) ], operation_timeout=1.0) logging_to_console_and_syslog("ret = {}".format(ret)) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Posting filename={} into " "kafka broker={}, topic={}".format(filename, self.broker_name, self.topic)) value = filename.encode('utf-8') try: # Produce line (without newline) self.producer_instance.produce( self.topic, value, callback=ConfluentKafkaMsgQAPI.delivery_callback) status = True except BufferError: sys.stderr.write('%% Local producer queue is full ' '(%d messages awaiting delivery): try again\n' % len(self.producer_instance)) status = False except: print("ConfluentKafkaMsgQAPI: Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) status = False else: event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \ "kafka broker={}, topic={}." \ .format(filename, self.broker_name, self.topic) logging_to_console_and_syslog(event) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. self.producer_instance.poll(timeout=0.1) # Wait until all messages have been delivered # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance)) self.producer_instance.flush(timeout=0.1) return status def __consumer_connect_to_kafka_broker_and_to_a_topic(self): """ This method tries to connect to the kafka broker. :return: """ pass def __consumer_poll_for_new_messages(self): logging_to_console_and_syslog( "{}: Polling the kafka consumer instance for " "new messages in the topic {}.".format(self.thread_identifier, self.topic)) # Read messages from Kafka, print to stdout try: while True: msg = self.consumer_instance.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: # Proper message sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) print(msg.value()) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: # Close down consumer to commit final offsets. self.consumer_instance.close() """ msg = self.consumer_instance.poll(timeout=5.0) if msg is None: return None if msg.error(): raise KafkaException(msg.error()) else: logging_to_console_and_syslog("msg = {}".format(msg)) logging_to_console_and_syslog('Consumer:{}: Rcvd msg %% %s [%d] at offset %d with key %s: value : %s\n' .format(self.thread_identifier, msg.topic(), msg.partition(), msg.offset(), str(msg.key()), str(msg.value())) ) return msg.value() """ return None def __consumer_connect_to_broker(self): """ This method tries to connect to the kafka broker. :return: """ if self.consumer_instance: return # Consumer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md """ self.consumer_conf = {'bootstrap.servers': self.broker_name, 'group.id': 'kafka-consumer{}'.format(self.thread_identifier), 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest'} """ consumer_conf = { 'bootstrap.servers': self.broker_name, 'group.id': 'group', 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest' } consumer_conf['stats_cb'] = stats_cb consumer_conf['statistics.interval.ms'] = 0 # Create logger for consumer (logs will be emitted when poll() is called) logger = logging.getLogger('consumer') logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter( logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s')) logger.addHandler(handler) while self.consumer_instance is None: try: logging_to_console_and_syslog( "Consumer:{}:Trying to connect to broker_name={}".format( self.thread_identifier, self.broker_name)) # Create Consumer instance # Hint: try debug='fetch' to generate some log messages self.consumer_instance = Consumer(consumer_conf, logger=logger) except: logging_to_console_and_syslog( "Consumer:{}:Exception in user code:".format( self.thread_identifier)) logging_to_console_and_syslog("-" * 60) traceback.print_exc(file=sys.stdout) logging_to_console_and_syslog("-" * 60) time.sleep(5) logging_to_console_and_syslog("Consumer:{}:Consumer Successfully " "connected to broker_name={}".format( self.thread_identifier, self.broker_name)) @staticmethod def print_assignment(consumer, partitions): logging_to_console_and_syslog('consumer = {}, Assignment {}:', repr(consumer), partitions) def __subscribe_to_a_topic(self): try: # Subscribe to topics cluster_meta_data = self.consumer_instance.list_topics(self.topic, timeout=0.3) logging_to_console_and_syslog("ClusterMetaData={}".format( repr(cluster_meta_data))) if self.topic not in cluster_meta_data.topics.keys(): logging_to_console_and_syslog( "Topic {} is " "not found in the ClusterMetaData {}".format( self.topic, repr(cluster_meta_data.topics.keys()))) raise KafkaException def print_assignment(consumer, partitions): print('Assignment:', partitions) # Subscribe to topics self.consumer_instance.subscribe(self.topics, on_assign=print_assignment) """ self.consumer_instance.subscribe(self.topic, on_assign=ConfluentKafkaMsgQAPI.print_assignment) """ except: logging_to_console_and_syslog( "Consumer:{}: Subscribed to topic {}.".format( self.thread_identifier, self.topic)) return True def __iterate_over_kafka_consumer_instance_messages(self): """ logging_to_console_and_syslog("Consumer:{}: dequeue {}." .format(self.thread_identifier, self.topic)) """ pass def dequeue(self): try: if self.perform_subscription: # logging_to_console_and_syslog("{}:Perform __consumer_poll_for_new_messages." # .format(self.thread_identifier)) return self.__consumer_poll_for_new_messages() else: # logging_to_console_and_syslog("{}:Perform __iterate_over_kafka_consumer_instance_messages." # .format(self.thread_identifier)) return self.__iterate_over_kafka_consumer_instance_messages() except: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI:Exception occurred while polling for " "a message from kafka Queue. {} ".format(sys.exc_info()[0])) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI:Exception in user code:") logging_to_console_and_syslog("-" * 60) traceback.print_exc(file=sys.stdout) logging_to_console_and_syslog("-" * 60) return None def cleanup(self): pass
from confluent_kafka import Producer import socket bootstrap_servers = "localhost:9092" topic = "q-data" conf = { "bootstrap.servers": bootstrap_servers, "client.id": socket.gethostname() } producer = Producer(conf) def acked(err, msg): if err is not None: print("Failed to deliver message: %s: %s" % (str(msg), str(err))) else: print("Message produced: %s" % (str(msg))) producer.produce(topic, key="42", value="73", callback=acked) # Wait up to 1 second for events. Callbacks will be invoked during # this method call if the message is acknowledged. producer.poll(1)
# print("Message produced: {}".format(msg.value().decode("utf-8"))) pass FAKER = Faker() try: while True: key = uuid.uuid4() profile = FAKER.profile() profile.pop('ssn') profile.pop('blood_group') profile.pop('website') profile.pop('username') profile.pop('residence') profile.pop('current_location') geolocator = Nominatim(user_agent="kafka-producer") address = barnum.create_city_state_zip() location = geolocator.geocode('{} United States'.format(address[1])) profile['longitude'] = location.longitude profile['latitude'] = location.latitude profile['birthdate'] = str(profile['birthdate']) profile['article'] = random.choice(CATEGORIES) profile['address'] = ' '.join(address) profile['time'] = str(arrow.utcnow()) PRODUCER.produce('druid-test', key = profile['time'], value = json.dumps(profile), callback = acked) PRODUCER.poll(0.5) time.sleep(5) except KeyboardInterrupt: pass PRODUCER.flush(30)
class FurnaceUploader(): def __init__(self, configFilePath="./config.json"): self.config = self.readConfig(configFilePath) self.initProducer() def readConfig(self, filepath): config = {} with open(filepath) as json_config: config = json.load(json_config) return config def initProducer(self): if "kafka_producer" in self.config: self.kafkaProducer = Producer(self.config["kafka_producer"]) else: raise Exception("Cannot find Kafka Producer configuration.") def producerSend(self, data): self.kafkaProducer.poll(1.0) # b = np.fromstring((json.loads(a))["frame"], 'uint8') # p.produce('frames', json.dumps(frameRecord).encode('utf-8'), callback=delivery_report) encodedData = json.dumps(data) print("Message size", len(encodedData)) self.kafkaProducer.produce('frames', encodedData, callback=kafkaDeliveryReport) def uploadFile(self, filepath, startTime): frameExtractor = FrameExtractor(filepath, 1) batcher = DataBatcher(keyFunc=batchKeyFun, valFunc=batchValueFun) for (sec, frame) in frameExtractor.getNextFrame(): resizedFrame = resizeFrame(frame, 640, 480) # showFrame(resizedFrame) dt = datetime.timedelta(seconds=sec) frameTime = startTime + dt # START SHORTCIRCUIT BATCHING print(resizedFrame.shape) frameRecord = { "camera": 'camera1', "timestamp": str(frameTime), "timetype": "second", "frame": base64.b64encode(resizedFrame.tobytes()).decode( "utf-8") # While decoding, do numpy.array(json.loads()) } self.producerSend(frameRecord) # END SHORTCIRCUIT BATCHING batcher.addData((frameTime, resizedFrame)) for batch in batcher.getBatches(): # Process batch here for key, val in batch.items(): print("Process batch with key:", key, "and number of frames:", len(val)) batcher.endBatch() for batch in batcher.getBatches(): # Process last batch here for key, val in batch.items(): print("Process batch with key:", key, "and number of frames:", len(val)) self.kafkaProducer.flush()
'sasl.mechanisms': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******' }) def acked(err, msg): """Delivery report callback called (from flush()) on successful or failed delivery of the message.""" if err is not None: print("failed to deliver message: {}".format(err.str())) else: print("produced to: {} [{}] @ {}".format(msg.topic(), msg.partition(), msg.offset())) p.produce('python-test-topic', value='python test value', callback=acked) # flush() is typically called when the producer is done sending messages to wait # for outstanding messages to be transmitted to the broker and delivery report # callbacks to get called. For continous producing you should call p.poll(0) # after each produce() call to trigger delivery report callbacks. p.flush(10) c = Consumer({ 'bootstrap.servers': '<ccloud bootstrap servers>', 'broker.version.fallback': '0.10.0.0', 'api.version.fallback.ms': 0, 'sasl.mechanisms': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******',
from confluent_kafka import Producer some_data_source = ['dddd','ffff','gggg'] p = Producer({'bootstrap.servers':'172.17.0.4'}) for data in some_data_source: p.produce('ragnatopic', data.encode('utf-8')) p.flush()
'bootstrap.servers': "kafka:2181", 'group.id': "json_producer" }) time.sleep(10) def delivery_callback (err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d]\n' % \ (msg.topic(), msg.partition())) for tweet in get_tweet('examples/tweets-200k.txt.gz'): # if len(tweet['entities']['urls']) > 0 and \ # any(tweet['lang'] in l for l in ['es', 'en']): try: print("%s: %s" % (tweet['user']['screen_name'], tweet['text'])) kfk.produce( "raw_tweets", json.dumps(tweet), callback=delivery_callback ) kfk.poll(0) kfk.flush() except BufferError as e: sys.stderr.write('%% Local producer queue is full ' \ '(%d messages awaiting delivery): try again\n' % len(kfk))
class LoaderProducer: def __init__(self, broker=None, callback_function=None): """ Instantiate the class and create the consumer object :param broker: host[:port]’ string (or list of ‘host[:port]’ strings) that the consumer should contact to bootstrap initial cluster metadata :param callback_function: fn taking 3 args: err, msg, obj, that is called after the event is produced and an error increment (int). Default logs the error or success """ self.broker = broker self.partition = 0 self.callback_function = callback_function if callback_function else self.callback_fn # Create consumer self.producer = Producer(self._generate_config()) def _generate_config(self): """ Generate configuration dictionary for consumer :return: """ config = {"bootstrap.servers": self.broker, "session.timeout.ms": 6000} return config def produce_event(self, topic, record): """ Produce event in the specified topic :param topic: str :param record: dict :return: """ try: self.producer.produce( topic=topic, value=json.dumps(record, default=self.default_json_encoder), callback=lambda err, msg, obj=record: self.callback_function(err, msg, obj), ) self.producer.poll(1) # Callback function except ValueError as error: logger.error(error) @staticmethod def default_json_encoder(o): """ Json Encoder for datetime :return: """ if isinstance(o, (datetime.date, datetime.datetime)): return o.isoformat() @staticmethod def callback_fn(err, msg, obj): """ Handle delivery reports served from producer.poll. This callback takes an extra argument, obj. This allows the original contents to be included for debugging purposes. """ if err is not None: logger.debug( "Message {} delivery failed with error {} for topic {}".format( obj, err, msg.topic() ) ) else: logger.debug("Event Successfully created")
class KafkaDestination(object): """ syslog-ng Apache Kafka destination. """ _kafka_producer = None _conf = dict() def __init__(self): self.hosts = None self.topic = None self.msg_key = None self.partition = None self.programs = None self.group_id = None self.broker_version = None self.verbose = False self.display_stats = False self.producer_config = None def init(self, args): """ This method is called at initialization time. Should return False if initialization fails. """ if 'producer_config' in args: try: self.producer_config = ast.literal_eval(args['producer_config']) self._conf.update(self.producer_config) except ValueError: LOG.error("Given config %s is not in a Python dict format." % args['producer_config']) try: self.hosts = args['hosts'] self.topic = args['topic'] self._conf['bootstrap.servers'] = self.hosts except KeyError: LOG.error("Missing `hosts` or `topic` option...") return False if 'msg_key' in args: self.msg_key = args['msg_key'] LOG.info("Message key used will be %s" % self.msg_key) if 'partition' in args: self.partition = args['partition'] LOG.info("Partition to produce to %s" % self.partition) # optional `programs` parameter to filter out messages if 'programs' in args: self.programs = parse_str_list(args['programs']) LOG.info("Programs to filter against %s" % self.programs) if 'group_id' in args: self.group_id = args['group_id'] self._conf['group.id'] = self.group_id LOG.info("Broker group_id=%s" % self.group_id) if 'broker_version' in args: self.broker_version = args['broker_version'] if '.'.join(self.broker_version.split('.')[:2]) in ('0.10', '0.11'): self._conf['api.version.request'] = True else: self._conf['broker.version.fallback'] = self.broker_version self._conf['api.version.request'] = False LOG.info("Broker version=%s" % self.broker_version) else: self.broker_version = DEFAULT_BROKER_VERSION_FALLBACK self._conf[ 'broker.version.fallback'] = DEFAULT_BROKER_VERSION_FALLBACK self._conf['api.version.request'] = False LOG.warn("Default broker version fallback %s " "will be applied here." % DEFAULT_BROKER_VERSION_FALLBACK) self._conf['on_delivery'] = delivery_callback if 'verbose' in args: # provide a global `on_delivery` callback in the `Producer()` config # dict better for memory consumptions vs per message callback. self.verbose = ast.literal_eval(args['verbose']) if not self.verbose: # only interested in delivery failures here. We do provide a # global on_delivery callback in the Producer() config dict and # also set delivery.report.only.error. self._conf['delivery.report.only.error'] = True LOG.info("Verbose mode is OFF: you will not be able to see " "messages in here. Failures only. Use 'verbose=('True')' " "in your destination options to see successfully " "processed messages in your logs.") # display broker stats? if 'display_stats' in args: self.display_stats = ast.literal_eval(args['display_stats']) if self.display_stats: self._conf['stats_cb'] = stats_callback LOG.info("Broker statistics will be displayed.") LOG.info( "Initialization of Kafka Python driver w/ args=%s" % self._conf) return True def open(self): """ Open a connection to the Kafka service. Should return False if initialization fails. """ LOG.info("Opening connection to the remote Kafka services at %s" % self.hosts) self._kafka_producer = Producer(**self._conf) return True def is_opened(self): """ Check if the connection to Kafka is able to receive messages. Should return False if target is not open. """ return self._kafka_producer is not None def close(self): """ Close the connection to the Kafka service. """ LOG.debug("KafkaDestination.close()....") if self._kafka_producer is not None: LOG.debug("Flushing producer w/ a timeout of 30 seconds...") self._kafka_producer.flush(30) return True # noinspection PyMethodMayBeStatic def deinit(self): """ This method is called at deinitialization time. """ LOG.debug("KafkaDestination.deinit()....") if self._kafka_producer: self._kafka_producer = None return True def send(self, ro_msg): """ Send a message to the target service It should return True to indicate success, False will suspend the destination for a period specified by the time-reopen() option. :return: True or False """ # do nothing if msg is empty if not ro_msg: return True # no syslog-ng `values-pair` here we dealing with `LogMessage` if type(ro_msg) != dict: # syslog-ng `LogMessage` is read-only # goal is rfc5424 we cannot use values-pair because of memory leaks try: msg = {'FACILITY': ro_msg.FACILITY, 'PRIORITY': ro_msg.PRIORITY, 'HOST': ro_msg.HOST, 'PROGRAM': ro_msg.PROGRAM, 'DATE': ro_msg.DATE, 'MESSAGE': ro_msg.MESSAGE} except AttributeError: LOG.error("Your version of syslog-ng is not supported. " "Please use syslog-ng 3.7.x") return False else: LOG.warn("You are using `values-pair` if you are using " "syslog-ng <= 3.11 it is known to be leaking...") msg = ro_msg try: # check if we do have a program filter defined. msg_program = msg['PROGRAM'] if self.programs is not None: if msg_program not in self.programs: # notify of success return True if msg_program == 'firewall': firewall_msg = msg['MESSAGE'] msg['MESSAGE'] = parse_firewall_msg(firewall_msg) elif msg_program == 'nat': nat_msg = msg['MESSAGE'] msg['MESSAGE'] = parse_nat_msg(nat_msg) # convert date string to UNIX timestamp msg_date = msg['DATE'] if msg_date is not None: msg['DATE'] = date_str_to_timestamp(msg_date) msg_string = str(msg) kwargs = {} if self.msg_key and self.msg_key in msg.keys(): kwargs['key'] = msg[self.msg_key] if self.partition: try: kwargs['partition'] = int(self.partition) except ValueError: LOG.warning( "Ignore partition=%s because it is not an int." % self.partition) self._kafka_producer.produce(self.topic, msg_string, **kwargs) # `poll()` doesn't do any sleeping at all if you give it 0, all # it does is grab a mutex, check a queue, and release the mutex. # It is okay to call poll(0) after each produce call, the # performance impact is negligible, if any. self._kafka_producer.poll(0) except BufferError: LOG.error("Producer queue is full. This message will be discarded. " "%d messages waiting to be delivered.", len(self._kafka_producer)) # do not return False here as the destination would be closed # and we would have to restart syslog-ng sleep(5) return True except (KafkaException, UnicodeEncodeError) as e: LOG.error("An error occurred while trying to send messages... " "See details: %s" % e, exc_info=True) sleep(5) # do not return False here as the destination would be closed # and we would have to restart syslog-ng return True return True
'../cacert.pem', # TODO: figure out resolve via parent walk 'sasl.mechanism': 'PLAIN', 'sasl.username': '******', 'sasl.password': os.getenv('SERVICE_BUS_CONNECTION_STRING'), 'client.id': 'python-example-producer' } # Create Producer instance p = Producer(**conf) def delivery_callback(err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d] @ %s\n' % (msg.topic(), msg.partition(), msg.value())) # Write 1-100 to topic for i in range(0, 100): try: p.produce(topic, str(i), callback=delivery_callback) except BufferError as e: sys.stderr.write( '%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(p)) p.poll(0) # Wait until all messages have been delivered sys.stderr.write('%% Waiting for %d deliveries\n' % len(p)) p.flush()
def _kafkaAssign(): consumer_assigned = False global kafka_consumer def flush(consumer, partition): nonlocal consumer_assigned for p in partition: p.offset = OFFSET_END consumer.assign(partition) consumer_assigned = True kafka_consumer.subscribe([admin_topic], on_assign=flush) while not consumer_assigned: kafka_consumer.poll(1) kafka_consumer.subscribe([admin_topic]) _kafkaAssign() while True: message = kafka_consumer.poll(0) if message != None and not message.error(): random_config = gen_random_configs() sensor_meta = json.loads(message.value().decode()) print("New Sensor", sensor_meta) new_sensor = {'type': "new_sensor_config", "content": sensor_meta} new_sensor['content']['configs'] = random_config print("New Sensor Config", new_sensor) kafka_producer.produce(sensor_topic, json.dumps(new_sensor))
msg = c.poll(1.0) if msg is None: continue message = json.loads(msg.value().decode('utf-8')) #print(message) polaridad = sentiment_analyzer_scores(message['tweet']) tweet_neg = [] tweet_pos = [] tweet_neu = [] if polaridad < 0: tweet_neg = json.dumps(message).encode("utf-8") #print("Tweet negativo:", tweet_neg) print('Produciendo tweet negativo') p2.produce(TOPIC3, tweet_neg) elif polaridad == 0: tweet_neu = json.dumps(message).encode("utf-8") print('Produciendo tweet neutro') p2.produce(TOPIC2, tweet_neu) elif polaridad > 0: tweet_pos = json.dumps(message).encode("utf-8") print('Produciendo tweet positivo') p2.produce(TOPIC4, tweet_pos) c.close()
chunksize = int(args.chunksize) sleeptime = int(args.sleeptime) KAFKA_TOPIC = args.topic ''' the time record is "TIME" we read data by chunk so we can handle a big sample data file ''' input_data = pd.read_csv(INPUT_DATA_FILE, parse_dates=['TIME'], iterator=True, chunksize=chunksize) kafka_producer = Producer({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVER}) for chunk_data in input_data: ''' now process each chunk ''' chunk = chunk_data.dropna() for index, row in chunk.iterrows(): ''' Assume that when some data is available, we send it to Kafka in JSON ''' json_data = json.dumps(row.to_dict(), default=datetime_converter) #check if any event/error sent print(f'DEBUG: Send {json_data} to Kafka') kafka_producer.produce(KAFKA_TOPIC, json_data.encode('utf-8'), callback=kafka_delivery_error) kafka_producer.flush() # sleep a while, if needed as it is an emulation time.sleep(sleeptime)
import pyodbc from confluent_kafka import Producer print ('---login--- ') conn = pyodbc.connect("DRIVER={ODBC Driver 17 for SQL Server};SERVER=DevSQL01;DATABASE=HackStream;UID=greatscott;PWD=H4ppyFunB4ll;") cursor = conn.cursor() cursor.execute('SELECT MAX(ActionId) FROM HackStream.dbo.Funnel') for row in cursor: print (row) print ('--end--') p = Producer({'bootstrap.servers': '172.16.43.33:9092'}) p.produce('Funnel', key='hello', value='world') p.flush(10)