def test_create_topic_exception(self, mocker): mock_kafka_admin = mocker.Mock() mocker.patch('confluent_kafka.admin.AdminClient', return_value=mock_kafka_admin) from confluent_kafka.cimpl import KafkaException mock_kafka_admin.create_topics = mocker.MagicMock( side_effect=KafkaException('fo')) kafka_manager = KafkaTopicManager('kafka.servicex.org:1272') result = kafka_manager.create_topic('my-topic', max_message_size=1024, num_partitions=100) assert not result
def basic_consume_loop(consumer, topics): try: consumer.assign(topics) durs = [] i=0 message = {} while running: msg = consumer.poll(timeout=1.0) if msg is None: continue message = {} if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write('%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) else: message = loads(msg.value().decode("utf-8")) #print(message) if not message['dur_evt_inf'] is None: i = i + 1 durs.append(message['dur_evt_inf']) if(i==1000000): break #durs.append(m['dur_evt_inf']) if (i % 1000 == 0): print(message) #now2 = datetime.now() print(i) finally: # Close down consumer to commit final offsets. consumer.close() #print(durs) mean = statistics.mean(durs) median = statistics.median(durs) max1 = max(durs) min2 = min(durs) print('max=' + str(max1)) print('min=' + str(min2)) print('avg=' + str(mean)) print('med=' + str(median)) print('total obs =' + str(len(durs)))
def poll(self): msg = self._consumer.poll(self._config["poll_timeout"]) if msg is not None: err = msg.error() if err: if err.code() == KafkaError._PARTITION_EOF: return None else: self._logger.info( "KafkaConsumer Error {} at pid {}: topic={} partition=[{}] reached end at offset {}\n" .format(err.code(), os.getpid(), msg.topic(), msg.partition(), msg.offset())) raise KafkaException(err) if msg.value(): return msg
def run(self): try: self.consumer.assign(self.tls) i = 0 while (True): msg = self.consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) else: message = loads(msg.value().decode("utf-8")) ingest_ts = message['ingestTs'] message_id = message['message_id'] truth = message['Class'] y_hat = truth ## Replace with model.predict & model.learn_one( inference_ts = time.time() out = {} out['ingest_ts'] = ingest_ts out['message_id'] = message_id out['truth'] = truth ## model.learn_one(Y,Y_HAT) out['y_hat'] = y_hat out['inference_ts'] = inference_ts out['dur_evt_inf'] = inference_ts - ingest_ts i = i + 1 # sprint(self.result_t) k = str(i) v = json.dumps(out).encode('utf-8') #self.producer.produce(self.result_t, value=v, key=k) # self.producer.flush() if (i % 2 == 0): print('sending to ' + self.result_t + ' ' + str(self.result_t_p) + ' ' + str(out)) self.producer.flush() #self.producer.close() finally: # Close down consumer to commit final offsets. self.consumer.close()
def process_msgs(self, msgs) -> bool: points = [] for msg in msgs: if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) return False if DEBUG == "true": print('Received message: %s' % msg.value().decode('utf-8'), flush=True) data_input = json.loads(msg.value().decode('utf-8')) if self.filter_msg(data_input): body = { "measurement": self.data_measurement, "fields": get_field_values(self.field_config, data_input) } if self.try_time: try: body["time"] = Tree(data_input).execute( '$.' + self.data_time_mapping) except SyntaxError as err: print( 'Disabling reading time from message, error occurred:', err.msg, flush=True) print( 'Influx will set time to time of arrival by default', flush=True) self.try_time = False if len(self.tag_config) > 0: body["tags"] = get_field_values(self.tag_config, data_input) if DEBUG == "true": print('Write message: %s' % body, flush=True) points.append(body) try: self.influx_client.write_points(points, time_precision=self.time_precision) except exceptions.InfluxDBClientError as e: print('Received Influx error: %s', e.content, flush=True) return True
def run_consumer(container_manager): schema_registry_conf = {'url': config['kafka']['schema_registry']} schema_registry_client = SchemaRegistryClient(schema_registry_conf) avro_deserializer = AvroDeserializer(schemas.run_record_schema, schema_registry_client) string_deserializer = StringDeserializer('utf_8') conf = { 'bootstrap.servers': config['kafka']['servers'], 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': "runs-consumers", 'auto.offset.reset': 'earliest', 'enable.auto.commit': 'false' } consumer = DeserializingConsumer(conf) print('[+] Listening for incoming runs') try: consumer_topics = [config['kafka']['runs-topic']] consumer.subscribe(consumer_topics) while True: try: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: print('[-] Run initialization') print(msg.value()) consumer.commit(asynchronous=False) # handlers.handle_run_execution(container_manager, msg.value()) threading.Thread(target=handlers.handle_run_execution, args=(container_manager, msg.value())).start() except ConsumeError as e: print( f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}' ) finally: consumer.close()
def mensagem_dados_empresa_consumer(self): try: self.__consumer.subscribe(self.__topics) mensagem = self.__consumer.poll(timeout=1.0) if mensagem is None: return None if mensagem.error(): if mensagem.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write('%% %s [%d] reached end at offset %d\n' % (mensagem.topic(), mensagem.partition(), mensagem.offset())) elif mensagem.error(): raise KafkaException(mensagem.error()) else: return mensagem except Exception: print("Erro!")
def get_watched_messages(self, interval=0.0): logging.debug( "Checking messages that appeared on kafka topics: %r", self.watching_topics) res = [] while True: message = self.consumer.poll(interval) if message is None: break # done reading if message.error(): raise KafkaException("kafka consumer error: {}".format( message.error())) res.append(message) # TODO: consumer.close() return res
def basic_consume_loop(consumer, topics): try: consumer.subscribe(topics) while running: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) else: msg_process(msg) finally: # Close down consumer to commit final offsets. consumer.close()
def get_watched_messages(self, interval=0.0, predicate=lambda x: True): logging.debug( "Checking messages that appeared on kafka topics: %r", self.watching_topics) res = [] start = time.time() while True: msg = self.consumer.poll(interval) if msg is None or time.time() - start > interval: break # done reading if msg.error(): raise KafkaException("kafka consumer error: {}".format( msg.error())) logging.debug( "Potential message: %r", (msg.partition(), msg.key(), msg.headers(), msg.value())) if predicate(msg): res.append(msg) # TODO: consumer.close() return res
def run(self): ### Set up model, metric, and starting timestamp for this model instance ### model = self.model metric = self.metric print('MODEL and METRIC before any messages consumed:', model, metric) start_consume_ts = time.time() print('Start Consume Time ' + str(start_consume_ts)) ###### try: self.consumer.assign(self.tls) i = 0 while (True): msg = self.consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) else: message = loads(msg.value().decode("utf-8")) ingest_ts = message[ 'ingestTs'] # Ingestion time from Kafka message.pop('ingestTs') message_id = message['message_id'] # Message ID from Kafka message.pop('message_id') truth = float( message['Class']) # True label supplied by Kafka message.pop( 'Class' ) # Remove label remove passing the data for prediction (could combine with above) message.pop('ignore') x = message for k, v in x.items(): x[k] = float(v) y_hat = model.score_one( x) #model.predict_one(x) # make a prediction if (i % 1000 == 0): print('x: ', x) for k, v in x.items(): print(type(x[k])) print('model: ', model) print('y_hat: ', y_hat) inference_ts = time.time() metric = metric.update(truth, y_hat) # update the metric model = model.learn_one( x) # model.learn_one(x,y) # make the model learn learn_ts = time.time() out = {} out['ingest_ts'] = ingest_ts out['learn_ts'] = learn_ts out['message_id'] = message_id out['truth'] = truth out['y_hat'] = y_hat out['inference_ts'] = inference_ts out['dur_evt_inf'] = inference_ts - ingest_ts out['dur_start_inf'] = inference_ts - start_consume_ts out['dur_inf_learn'] = learn_ts - inference_ts out['model_metric'] = metric.get() out['ignore'] = False i = i + 1 # sprint(self.result_t) k = str(i) v = json.dumps(out).encode('utf-8') self.producer.produce(self.result_t, value=v, key=k) # self.producer.flush() if (i % 1000 == 0): print('sending to ' + self.result_t + ' ' + str(self.result_t_p) + ' ' + str(out)) self.producer.flush() if (i % 100000 == 0): try: pickle.dump(model, open("model.pkl", "wb")) except: os.remove("model.pkl") pickle.dump(model, open("model.pkl", "wb")) #self.producer.close() finally: # Close down consumer to commit final offsets. self.consumer.close()
def consumer(networkName, topic): es = Elasticsearch('192.168.5.71:9200') conf = { 'bootstrap.servers': "192.168.5.41:9092", 'group.id': "demo", 'auto.offset.reset': 'smallest' } consumer = Consumer(conf) topics = [topic] consumer.subscribe(topics) running = True try: consumer.subscribe(topics) msg_count = 0 nd_count = 0 json_object = "" while running: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) else: # msg_process(msg) # print(msg.value().decode("utf-8")) # messageStr = (msg.value()) if "_index" in msg.value().decode("utf-8"): json_object = "{" nd_count += 1 if nd_count > 0: json_object += (msg.value().decode("utf-8")) if msg.value().decode("utf-8").strip() == ",": # wrpcap('filtered.pcap', msg.value(), append=True) with open('json_file_pretty', 'a', encoding='utf-8') as outfile: # json.dump(messageStr, fp) if json_object != "": x = json.loads( json.dumps(json_object, sort_keys=True, indent=4)) print(x) print(networkName) json.dump(x, outfile) json_object = "" nd_count = 0 # doc = { # 'text': messageStr, # 'timestamp': datetime.now(), # } # es.index(index="test-index", body=doc, refresh='wait_for') msg_count += 1 # if msg_count % MIN_COMMIT_COUNT == 0: # consumer.commit(async=False) finally: # Close down consumer to commit final offsets. consumer.close()