def kafka_local_file(opticons=None,broker='',group='',topics=''): broker = argv[0] group = argv[1] topics = argv[2:] # Consumer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md conf = {'bootstrap.servers': broker, 'group.id': group, 'session.timeout.ms': 6000, 'default.topic.config': {'auto.offset.reset': 'smallest'}} # Check to see if -T option exists for opt in optlist: if opt[0] != '-T': continue try: intval = int(opt[1]) except ValueError: sys.stderr.write("Invalid option value for -T: %s\n" % opt[1]) sys.exit(1) if intval <= 0: sys.stderr.write("-T option value needs to be larger than zero: %s\n" % opt[1]) sys.exit(1) conf['stats_cb'] = stats_cb conf['statistics.interval.ms'] = int(opt[1]) # Create logger for consumer (logs will be emitted when poll() is called) logger = logging.getLogger('consumer') logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter(logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s')) logger.addHandler(handler) # Create Consumer instance # Hint: try debug='fetch' to generate some log messages c = Consumer(conf, logger=logger) def print_assignment(consumer, partitions): print('Assignment:', partitions) # Subscribe to topics c.subscribe(topics, on_assign=print_assignment) # hdfs login #client = hdfs.Client('http://%s:50070' % (hdfshost)) # client = InsecureClient('http://%s:50070' % (hdfshost),user='******') # Read messages from Kafka, print to stdout try: while True: logtime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) msg = c.poll(timeout=1.0) if msg is None: continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write('%s %s [%d] reached end at offset %d\n' % (logtime, msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: msgstr = msg.value().decode('utf-8') msgdict = json.loads(msgstr,encoding="uft-8",object_pairs_hook=OrderedDict) database = msgdict.get('database').encode() table = msgdict.get('table').encode() type = msgdict.get('type').encode() if type == 'insert': data = msgdict.get('data') datalist = data.values() datastr = ','.join('%s' % id for id in datalist).encode() hour = time.strftime('%Y-%m-%d-%H',time.localtime(time.time())) localfile = '/mnt/var/%s.%s.%s.%s' % (database,table,type,hour) sys.stderr.write('%s %s [%d] at offset %d with key %s:\n' % (logtime,msg.topic(),msg.partition(),msg.offset(),msgstr)) with open(localfile,'a') as writer: writer.write(datastr+'\n') else: sys.stderr.write('%s %s [%d] at offset %d with key %s:\n' % (logtime,msg.topic(),msg.partition(),msg.offset(),type)) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') # Close down consumer to commit final offsets. c.close()
def print_assignment(consumer, partitions): print('Assignment:', partitions) if __name__ == '__main__': consumer_conf = get_section_config('consumer') consume = Consumer(**consumer_conf) consume.subscribe(['example-topic-output'], on_assign=print_assignment) try: while True: msg = consume.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: # Proper message sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) print(msg.value()) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: # Close down consumer to commit final offsets. consume.close()
def item_consume_red(): props = { 'bootstrap.servers': '10.1.1.133:9092', # Kafka集群在那裡? (置換成要連接的Kafka集群) 'auto.offset.reset': 'latest', # Offset從最前面開始(earliest) 'enable.auto.commit': True, # auto commit 'session.timeout.ms': 6000, # consumer超過6000ms沒有與kafka連線,會被認為掛掉了 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Consumer的實例 consumer = Consumer(props) # 步驟3. 指定想要訂閱訊息的topic名稱 topicName1 = "items2" # topicName = "test" # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic consumer.subscribe([topicName1]) # consumer.subscribe([topicName]) # 步驟5. 持續的拉取Kafka有進來的訊息 # on_assign=my_assign count = 0 try: while True: # 請求Kafka把新的訊息吐出來 records = consumer.consume(num_messages=500, timeout=1.0) # 批次讀取 if records is None: continue for record in records: # 檢查是否有錯誤 if record is None: continue if record.error(): # Error or event if record.error().code() == KafkaError._PARTITION_EOF: sys.stderr.write( '%% {} [{}] reached end at offset {} - {}\n'. format(record.topic(), record.partition(), record.offset())) # End of partition event else: # Error raise KafkaException(record.error()) else: # ** 在這裡進行商業邏輯與訊息處理 ** # 取出相關的metadata topic = record.topic() partition = record.partition() offset = record.offset() timestamp = record.timestamp() # 取出msgKey與msgValue msgKey = try_decode_utf8(record.key()) msgValue = try_decode_utf8(record.value()) msg_k_v = {msgKey, msgValue} # 秀出metadata與msgKey & msgValue訊息 count += 1 print('{}-{}-{} : ({} , {})'.format( topic, partition, offset, msgKey, msgValue)) return msg_k_v consumer.close() # print("consumer") except KeyboardInterrupt as e: sys.stderr.write('Aborted by user\n') except Exception as e: sys.stderr.write(str(e)) finally: # 步驟6.關掉Consumer實例的連線 consumer.close()
def run(self): logger.debug("Starting snuba query subscriber") self.offsets.clear() conf = { "bootstrap.servers": self.bootstrap_servers, "group.id": self.group_id, "session.timeout.ms": 6000, "auto.offset.reset": self.initial_offset_reset, "enable.auto.commit": "false", "enable.auto.offset.store": "false", "enable.partition.eof": "false", "default.topic.config": { "auto.offset.reset": self.initial_offset_reset }, } def on_assign(consumer, partitions): for partition in partitions: if partition.offset == OFFSET_INVALID: updated_offset = None else: updated_offset = partition.offset self.offsets[partition.partition] = updated_offset logger.info( "query-subscription-consumer.on_assign", extra={ "offsets": six.text_type(self.offsets), "partitions": six.text_type(partitions), }, ) def on_revoke(consumer, partitions): partition_numbers = [ partition.partition for partition in partitions ] self.commit_offsets(partition_numbers) for partition_number in partition_numbers: self.offsets.pop(partition_number, None) logger.info( "query-subscription-consumer.on_revoke", extra={ "offsets": six.text_type(self.offsets), "partitions": six.text_type(partitions), }, ) self.consumer = Consumer(conf) self.consumer.subscribe([self.topic], on_assign=on_assign, on_revoke=on_revoke) try: i = 0 while True: message = self.consumer.poll(0.1) if message is None: continue error = message.error() if error is not None: raise KafkaException(error) i = i + 1 with sentry_sdk.start_transaction( op="handle_message", name="query_subscription_consumer_process_message", sampled=True, ), metrics.timer("snuba_query_subscriber.handle_message"): self.handle_message(message) # Track latest completed message here, for use in `shutdown` handler. self.offsets[message.partition()] = message.offset() + 1 if i % self.commit_batch_size == 0: logger.debug("Committing offsets") self.commit_offsets() except KeyboardInterrupt: pass self.shutdown()
def on_rebalance(consumer, partitions): for p in partitions: if p.error: raise KafkaException(p.error) print("Kafka partitions rebalanced: {} / {}".format( consumer, partitions))
def consumer_json_kafka(name): props = { 'bootstrap.servers': '10.120.28.129:9092', # Kafka集群在那裡? (置換成要連接的Kafka集群) 'group.id': 'STUDENTID', # ConsumerGroup的名稱 (置換成你/妳的學員ID) 'auto.offset.reset': 'earliest', # Offset從最前面開始 'session.timeout.ms': 6000, # consumer超過6000ms沒有與kafka連線,會被認為掛掉了 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Consumer的實例 consumer = Consumer(props) # 步驟3. 指定想要訂閱訊息的topic名稱 topicName = name # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic consumer.subscribe([topicName], on_assign=my_assign) # 步驟5. 持續的拉取Kafka有進來的訊息 list_data = [] list_key = [] msgValue=0 try: while True: # 請求Kafka把新的訊息吐出來 records = consumer.consume(num_messages=500, timeout=1.0) # 批次讀取 #time.sleep(3) if len(records)==0: if msgValue!=0: break else: continue else: pass for record in records: # 檢查是否有錯誤 if record is None: continue if record.error(): # Error or event if record.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write('%% {} [{}] reached end at offset {} - {}\n'.format(record.topic(), record.partition(), record.offset())) else: # Error raise KafkaException(record.error()) else: # ** 在這裡進行商業邏輯與訊息處理 ** # 取出相關的metadata topic = record.topic() partition = record.partition() offset = record.offset() timestamp = record.timestamp() # 取出msgKey與msgValue msgKey = try_decode_utf8(record.key()) msgValue = try_decode_utf8(record.value()) list_key.append(msgKey) list_data.append(msgValue) #print(msgKey,msgValue) except KeyboardInterrupt as e: sys.stderr.write('Aborted by user\n') except Exception as e: sys.stderr.write(e) finally: # 步驟6.關掉Consumer實例的連線 consumer.close() return list_key,list_data
def receiver(q, l, no_requests, w_q): """ Kafka listener for incoming requests """ l.acquire() print("%% Lock acquired") conf = { 'bootstrap.servers':BROKER, 'group.id':GROUP, 'session.timeout.ms':6000, 'default.topic.config':{'auto.offset.reset':'smallest'}, } c = Consumer(conf) try: c.subscribe([TOPIC]) tm_out = 5 tm_cur = time.time() tm_tot = tm_cur + tm_out messages_received = 0 l.release() print("%% Lock released") while True: msg = c.poll(timeout=1.0) if msg is None: tm_none = time.time() if tm_none > tm_tot: print("%% Messages received: {}".format(messages_received)) print( '%% No message recieved for {} seconds.'.format(tm_out) ) break continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: print( '%% Reached end of topic {} [{}] at offset {}\n'.format( msg.topic(), msg.partition(), msg.offset()) ) continue else: raise KafkaException(msg.error()) break msg_load = json.loads(msg.value()) if not msg_load.get('session_id') == session_id: print("Session ID mismatch") continue tm_msg = msg_load['sys_ts'] tm_tot = tm_msg + tm_out messages_received += 1 if messages_received == no_requests: print("%% Messages received: {}".format(messages_received)) # c_q.put(('reciever', messages_received)) break msg_auth_r = (1, msg_load) w_q.put(msg_auth_r) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: print("%% Closing consumer \n") c.close()
def on_delivery(self, err, msg): if err is not None: raise KafkaException(err) self.last_msg = msg
async def get_message_from_kafka_cb(self, callback_method) -> None: """ Get a specific message from the kafka_broker and invoke the callback_method automatically with the message body passed as an argument to the callback_method. :param callback_method: Takes a callback_method which is automatically called on successfully retrieving a message from the KafkaBroker. """ if self.consumer is None: logger.error("Kafka Consumer not initialized prior to this call") raise ValueError("ERROR - Consumer not initialized") if callback_method is None: logger.error( "No callback_method provided for handling of fetched message") raise ValueError("ERROR - callback_method not provided") loop = get_running_loop() topic_partition = None try: # This automatically sets the offset to the one provided by the user if it is not None topic_partition = TopicPartition(self.topic_name, self.partition, self.offset) self.consumer.assign([topic_partition]) # polls for exactly one record - waits for a configurable max time (seconds) msg = await loop.run_in_executor(None, self.consumer.poll, 5.0) if msg is None: # Handle timeout during poll msg = "Consumer error: timeout while polling message from Kafka" logger.error(msg) raise KafkaException(msg) if msg.error(): error_msg = f"Consumer - error: {msg.error()}" logger.error(error_msg) if msg.error().code() is KafkaError.OFFSET_OUT_OF_RANGE: raise KafkaMessageNotFoundError( error_msg) # throw a 404 at the controller raise KafkaException(error_msg) headers = msg.headers() message = None if headers is None: message = msg.value() # Re-evaluate later if we will need message semgentation or have a use case where the producer # will chunk messages and record them with the broker # else: # message = combine_segments(msg.value(), self._generate_header_dictionary(msg.headers())) if message is not None: logger.trace( f"Found message for topic_name - {self.topic_name}, partition - {self.partition} " f"and offset - {self.offset}. Invoking callback_method - {callback_method}", ) return await callback_method(message) else: _msg_not_found_error = "No message was found that could be fetched for " f"topic_name: {self.topic_name}, partition: {self.partition}, offset: {self.offset}" logger.error(_msg_not_found_error) raise KafkaMessageNotFoundError(_msg_not_found_error) finally: self._close_consumer()
def kafka_fail_fast(err: Optional[Any], _msg: Any) -> None: if err is not None: print("Kafka producer delivery error: {}".format(err)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(err)
async def _confluent_consumer_loop(q: asyncio.Queue): global c if c is None: raise Exception await asyncio.sleep( 1.0) # wait here so that kafka has time to finish creating topics try: consumer.subscribe_to_all(c) _set_halted_to_zero() while running: msg = c.poll(0) if msg is None: await asyncio.sleep(1.0) elif msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: _LOGGER.warning("%s [%d] reached end at offset %d", msg.topic(), msg.partition(), msg.offset()) else: raise KafkaException(msg.error()) else: contents = json.loads( msg.value().decode("utf-8")) # type: dict v = contents.get("version", "v1") ############################################################ # Choose which handler table to use based on env variables # ############################################################ if ConsumerModeEnum[ Configuration. CONSUMER_MODE] == ConsumerModeEnum.investigator: try: func = _handler_lookup(msg.topic(), v) except KeyError: _LOGGER.warning( "No handler for version %s of %s", v, _get_class_from_topic_name(msg.topic_name())) message_class = _get_class_from_topic_name(msg.topic()) missing_handler.labels( base_topic_name=message_class.base_name, message_version=v).set(1) _message_failed(msg) elif ConsumerModeEnum[ Configuration. CONSUMER_MODE] == ConsumerModeEnum.metrics: try: func = _handler_lookup(msg.topic(), v, table=metrics_handler_table, default=default_metric_handler) except KeyError: _LOGGER.warning( "Could not find entry in metrics handler table for %s.", msg.topic_name()) ############################################################# await q.put((func, msg)) await asyncio.sleep(0) finally: c.close() for _ in range(Configuration.NUM_WORKERS): await q.put(None ) # each worker can receive this value exactly once
def _fail_fast_produce(err: Any, msg: Any) -> None: if err is not None: print(f"Kafka producer delivery error: {err}", file=sys.stderr) raise KafkaException(err)
def run(self) -> None: def fail_fast(err: Any, _msg: Any) -> None: if err is not None: print("Kafka producer delivery error: {}".format(err)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(err) def on_commit(err: Any, partitions: List[Any]) -> None: if err is not None: print("Kafka consumer commit error: {}".format(err)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(err) for p in partitions: # check for partition-specific commit errors print(p) if p.error: print("Kafka consumer commit error: {}".format(p.error)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(p.error) print("Kafka consumer commit successful") pass def on_rebalance(consumer: Consumer, partitions: List[Any]) -> None: for p in partitions: if p.error: raise KafkaException(p.error) print("Kafka partitions rebalanced: {} / {}".format( consumer, partitions)) consumer_conf = self.kafka_config.copy() consumer_conf.update({ "group.id": self.consumer_group, "on_commit": fail_fast, # messages don't have offset marked as stored until pushed to # elastic, but we do auto-commit stored offsets to broker "enable.auto.commit": True, "enable.auto.offset.store": False, # user code timeout; if no poll after this long, assume user code # hung and rebalance (default: 5min) "max.poll.interval.ms": 180000, "default.topic.config": { "auto.offset.reset": "latest", }, }) consumer = Consumer(consumer_conf) producer_conf = self.kafka_config.copy() producer_conf.update({ "delivery.report.only.error": True, "default.topic.config": { "request.required.acks": -1, # all brokers must confirm }, }) producer = Producer(producer_conf) consumer.subscribe( [self.consume_topic], on_assign=on_rebalance, on_revoke=on_rebalance, ) print("Kafka consuming {}".format(self.consume_topic)) while True: msg = consumer.poll(self.poll_interval) if not msg: print("nothing new from kafka (poll_interval: {} sec)".format( self.poll_interval)) continue if msg.error(): raise KafkaException(msg.error()) cle = json.loads(msg.value().decode("utf-8")) # print(cle) print("processing changelog index {}".format(cle["index"])) release_ids = [] new_release_ids = [] file_ids = [] fileset_ids = [] webcapture_ids = [] container_ids = [] work_ids = [] release_edits = cle["editgroup"]["edits"]["releases"] for re in release_edits: release_ids.append(re["ident"]) # filter to direct release edits which are not updates if not re.get("prev_revision") and not re.get( "redirect_ident"): new_release_ids.append(re["ident"]) file_edits = cle["editgroup"]["edits"]["files"] for e in file_edits: file_ids.append(e["ident"]) fileset_edits = cle["editgroup"]["edits"]["filesets"] for e in fileset_edits: fileset_ids.append(e["ident"]) webcapture_edits = cle["editgroup"]["edits"]["webcaptures"] for e in webcapture_edits: webcapture_ids.append(e["ident"]) container_edits = cle["editgroup"]["edits"]["containers"] for e in container_edits: container_ids.append(e["ident"]) work_edits = cle["editgroup"]["edits"]["works"] for e in work_edits: work_ids.append(e["ident"]) # TODO: do these fetches in parallel using a thread pool? for ident in set(file_ids): file_entity = self.api.get_file(ident, expand=None) # update release when a file changes # TODO: also fetch old version of file and update any *removed* # release idents (and same for filesets, webcapture updates) release_ids.extend(file_entity.release_ids or []) file_dict = self.api.api_client.sanitize_for_serialization( file_entity) producer.produce( self.file_topic, json.dumps(file_dict).encode("utf-8"), key=ident.encode("utf-8"), on_delivery=fail_fast, ) # TODO: topic for fileset updates for ident in set(fileset_ids): fileset_entity = self.api.get_fileset(ident, expand=None) # update release when a fileset changes release_ids.extend(fileset_entity.release_ids or []) # TODO: topic for webcapture updates for ident in set(webcapture_ids): webcapture_entity = self.api.get_webcapture(ident, expand=None) # update release when a webcapture changes release_ids.extend(webcapture_entity.release_ids or []) for ident in set(container_ids): container = self.api.get_container(ident) container_dict = self.api.api_client.sanitize_for_serialization( container) producer.produce( self.container_topic, json.dumps(container_dict).encode("utf-8"), key=ident.encode("utf-8"), on_delivery=fail_fast, ) for ident in set(release_ids): release = self.api.get_release( ident, expand="files,filesets,webcaptures,container,creators") if release.work_id: work_ids.append(release.work_id) release_dict = self.api.api_client.sanitize_for_serialization( release) producer.produce( self.release_topic, json.dumps(release_dict).encode("utf-8"), key=ident.encode("utf-8"), on_delivery=fail_fast, ) # for ingest requests, filter to "new" active releases with no matched files if release.ident in new_release_ids: ir = release_ingest_request( release, ingest_request_source="fatcat-changelog") if ir and not release.files and self.want_live_ingest( release, ir): producer.produce( self.ingest_file_request_topic, json.dumps(ir).encode("utf-8"), # key=None, on_delivery=fail_fast, ) # send work updates (just ident and changelog metadata) to scholar for re-indexing for ident in set(work_ids): assert ident key = f"work_{ident}" work_ident_dict = dict( key=key, type="fatcat_work", work_ident=ident, updated=cle["timestamp"], fatcat_changelog_index=cle["index"], ) producer.produce( self.work_ident_topic, json.dumps(work_ident_dict).encode("utf-8"), key=key.encode("utf-8"), on_delivery=fail_fast, ) producer.flush() # TODO: publish updated 'work' entities to a topic consumer.store_offsets(message=msg)
def consumer(args, poll_timeout=3.0): """ Consumes packets from a Kafka topic. """ # setup the signal handler signal.signal(signal.SIGINT, signal_handler) # where to start consuming messages from kafka_offset_options = { "begin": seek_to_begin, "end": seek_to_end, "stored": seek_to_stored } on_assign_cb = kafka_offset_options[args.kafka_offset] # connect to kafka logging.debug("Connecting to Kafka; %s", args.kafka_configs) kafka_consumer = Consumer(args.kafka_configs) kafka_consumer.subscribe([args.kafka_topic], on_assign=on_assign_cb) # if 'pretty-print' not set, write libpcap global header if args.pretty_print == 0: sys.stdout.write(global_header(args)) sys.stdout.flush() try: pkts_in = 0 while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets): # consume a message from kafka msg = kafka_consumer.poll(timeout=poll_timeout) if msg is None: # no message received continue; elif msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: if args.pretty_print > 0: print "Reached end of topar: topic=%s, partition=%d, offset=%s" % ( msg.topic(), msg.partition(), msg.offset()) else: raise KafkaException(msg.error()) else: pkts_in += 1 logging.debug("Packet received: pkts_in=%d", pkts_in) if args.pretty_print == 0: # write the packet header and packet # AT: We are just sending over the results of the scan -- a list of macs/rssi's -- where this code # was dealing with network packet sniffers -- sys.stdout.write(json.dumps(msg.value(), indent=2)) # sys.stdout.write(packet_header(msg)) # sys.stdout.write(msg.value()) sys.stdout.flush() elif pkts_in % args.pretty_print == 0: # pretty print print 'Packet[%s]: date=%s topic=%s partition=%s offset=%s len=%s' % ( pkts_in, to_date(unpack_ts(msg.key())), args.kafka_topic, msg.partition(), msg.offset(), len(msg.value())) finally: sys.stdout.close() kafka_consumer.close()
def collect_image(topic: str, kafka_session: Consumer): """Collect an image from the respective image topic Arguments: broker {str} -- Kafka client topic {str} -- topic (ex. images) """ def print_assignment(consumer, partitions): print('Assignment:', partitions) kafka_session.subscribe(topic, on_assign=print_assignment) while True: msg = kafka_session.poll(timeout=1.0) if msg is None: continue logs.info("No messages available within topic : %s", topic) if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: logs.info('%% %s [%d] reached end of offset %d' % (msg.topic(), msg.partition(), msg.offset())) else: logs.debug("Kafka Exception : %s", msg.error()) raise KafkaException(msg.error()) else: # Well formed messaged logs.info( '%% %s [%d] at offset %d with key %s: ' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) # image transform image_array, orig_image_array = image_transform(msg) prediction, class_weights, final_conv_layer = do_inference( ts_server="172.23.0.9", ts_port=8500, model_input=image_array) # create CAM get_output = K.function([tf.convert_to_tensor(image_array)], [ tf.convert_to_tensor(final_conv_layer), tf.convert_to_tensor(prediction) ]) [conv_outputs, predictions] = get_output([image_array[0]]) conv_outputs = conv_outputs[0, :, :, :] # TODO: Receiving variable results across CAMs generated by this # method. Needs further investigation and comparison to original # CAM paper found here : http://cnnlocalization.csail.mit.edu/ cam = np.zeros(dtype=np.float32, shape=(conv_outputs.shape[:2])) for i, w in enumerate(class_weights[0]): cam += w * conv_outputs[:, :, i] cam = cam - np.min(cam) cam /= np.max(cam) #h,w = orig_image_array.shape[:2] cam = cv2.resize(cam, orig_image_array.shape[:2]) # TODO : Investigate why the cv2.resize() function transposes # the height and width of the orig_image_array #cam = cv2.resize(cam, (orig_image_array.shape[:2][1], orig_image_array.shape[:2][0]), interpolation=cv2.INTER_CUBIC) cam = np.uint8(255 * cam) heatmap = cv2.applyColorMap(cam, cv2.COLORMAP_JET) #heatmap[np.where(cam < 0.2)] = 0 img = heatmap * 0.3 + orig_image_array logs.info("Class Activation Map (CAM) Created!") # This is complete hackery and will need to be replaced # I don't know why a numpy array (see `img` array above) # would be 25MB when all constituent arrays are ~ 7MB total. # Let alone when saving an image to disk the image is only 1MB total. cv2.imwrite("inflight_img.png", img) new_img = Image.open("inflight_img.png", mode='r') img_bytes = io.BytesIO() new_img.save(img_bytes, format='PNG') img_bytes = img_bytes.getvalue() message = marshall_message(img_bytes, prediction.tolist()) os.remove("inflight_img.png") p = kafka_producer() p.poll(0) p.produce(results_kafka_topic, value=message, callback=kafka_delivery_report) p.flush()
def memberin2(): props = { 'bootstrap.servers': '10.1.1.133:9092', # Kafka集群 'group.id': 'peter', # ConsumerGroup的名稱 'auto.offset.reset': 'latest', 'enable.auto.commit': False, 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Consumer的實例 consumer = Consumer(props) # 步驟3. 指定想要訂閱訊息的topic名稱 topicName = 'logs' # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic consumer.subscribe([topicName]) # 步驟5. 持續的拉取Kafka有進來的訊息 try: while True: records_pulled = False # 用來檢查是否有有效的record被取出來 # 請求Kafka把新的訊息吐出來 records = consumer.consume(num_messages=500, timeout=1.0) # 批次讀取 if records is None: continue for record in records: # 檢查是否有錯誤 if record is None: continue if record.error(): # Error or event if record.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (record.topic(), record.partition(), record.offset())) else: # Error raise KafkaException(record.error()) else: records_pulled = True # ** 在這裡進行商業邏輯與訊息處理 ** # 取出相關的metadata # topic = record.topic() # partition = record.partition() # offset = record.offset() # timestamp = record.timestamp() # 取出msgKey與msgValue msgKey = try_decode_utf8(record.key()) msgValue = try_decode_utf8(record.value()) sendmsg = {msgKey: msgValue} qq = {} # 秀出metadata與msgKey & msgValue訊息 # print('%s-%d-%d : (%s , %s)' % (topic, partition, offset, msgKey, msgValue)) # test_msg = ('%s : %s' % (msgKey,msgValue)) print(sendmsg) print("已偵測到有會員進入商店") a = sendmsg['login'].split("'") qq.setdefault("Name", a[5]) print(qq) client = pymongo.MongoClient( "mongodb+srv://peter:[email protected]/ceb101?retryWrites=true&w=majority" ) mydb = client.wow mycol = mydb['fit'] mycol.insert_many([qq]) consumer.close() # 同步地執行commit (Sync commit) # if records_pulled: # offsets = consumer.commit(asynchronous=False) # print_sync_commit_result(offsets) except KeyboardInterrupt as e: sys.stderr.write('Aborted by user\n') except Exception as e: sys.stderr.write(str(e)) finally: print("慢慢逛")
def update(consumer, df, df2, df3, df4, df5, df6, df7): i = 0 while i < 50: i = i + 1 msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write('%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Proper message sys.stderr.write( '%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) print(msg.value()) # parse the message val = msg.value() result = json.loads(val) #t = datetime.strptime(result['time'], "%H:%M:%S.%f") t = float(result['timestamp']) vINX = float(result['INX_perChange']) vMSFT = float(result['MSFT_perChange']) vBA = float(result['BA_perChange']) volMSFT = float(result['MSFT_vol']) # add to the dataframe df.loc[len(df)] = [t, vINX] df2.loc[len(df2)] = [t, vMSFT] df3.loc[len(df3)] = [t, vBA] df4.loc[len(df4)] = [t, vMSFT / vINX] df5.loc[len(df5)] = [t, vBA / vINX] if len(df6['value']) > 0: df6.loc[len(df6)] = [ t, (vMSFT - df6['value'].iloc[-1]) / DELTAT ] df7.loc[len(df7)] = [ t, (volMSFT - df7['value'].iloc[-1]) / DELTAT ] # df6.loc[len(df6)] = 1 # df7.loc[len(df7)] = 1 else: df6.loc[len(df6)] = 0 df7.loc[len(df7)] = 0 # Sliding window of the WINDOW_SIZE most recent values if len(df['value']) > WINDOW_SIZE: r.data_source.data['y'] = list(df['value'])[-WINDOW_SIZE:] r.data_source.data['x'] = range(len(list( df['value'])))[-WINDOW_SIZE:] dots.data_source.data['y'] = list(df['value'])[-WINDOW_SIZE:] dots.data_source.data['x'] = range(len(list( df['value'])))[-WINDOW_SIZE:] else: r.data_source.data['y'] = list(df['value']) r.data_source.data['x'] = range(len(list(df['value']))) dots.data_source.data['y'] = list(df['value']) dots.data_source.data['x'] = range(len(list(df['value']))) # Sliding window of the WINDOW_SIZE most recent values if len(df2['value']) > WINDOW_SIZE: r2.data_source.data['y'] = list(df2['value'])[-WINDOW_SIZE:] r2.data_source.data['x'] = range(len(list( df2['value'])))[-WINDOW_SIZE:] dots2.data_source.data['y'] = list(df2['value'])[-WINDOW_SIZE:] dots2.data_source.data['x'] = range(len(list( df2['value'])))[-WINDOW_SIZE:] else: r2.data_source.data['y'] = list(df2['value']) r2.data_source.data['x'] = range(len(list(df2['value']))) dots2.data_source.data['y'] = list(df2['value']) dots2.data_source.data['x'] = range(len(list(df2['value']))) # Sliding window of the WINDOW_SIZE most recent values if len(df3['value']) > WINDOW_SIZE: r3.data_source.data['y'] = list(df3['value'])[-WINDOW_SIZE:] r3.data_source.data['x'] = range(len(list( df3['value'])))[-WINDOW_SIZE:] dots3.data_source.data['y'] = list(df3['value'])[-WINDOW_SIZE:] dots3.data_source.data['x'] = range(len(list( df3['value'])))[-WINDOW_SIZE:] else: r3.data_source.data['y'] = list(df3['value']) r3.data_source.data['x'] = range(len(list(df3['value']))) dots3.data_source.data['y'] = list(df3['value']) dots3.data_source.data['x'] = range(len(list(df3['value']))) # Sliding window of the WINDOW_SIZE most recent values if len(df4['value']) > WINDOW_SIZE: r4.data_source.data['y'] = list(df4['value'])[-WINDOW_SIZE:] r4.data_source.data['x'] = range(len(list( df4['value'])))[-WINDOW_SIZE:] dots4.data_source.data['y'] = list(df4['value'])[-WINDOW_SIZE:] dots4.data_source.data['x'] = range(len(list( df4['value'])))[-WINDOW_SIZE:] else: r4.data_source.data['y'] = list(df4['value']) r4.data_source.data['x'] = range(len(list(df4['value']))) dots4.data_source.data['y'] = list(df4['value']) dots4.data_source.data['x'] = range(len(list(df4['value']))) # Sliding window of the WINDOW_SIZE most recent values if len(df5['value']) > WINDOW_SIZE: r5.data_source.data['y'] = list(df5['value'])[-WINDOW_SIZE:] r5.data_source.data['x'] = range(len(list( df5['value'])))[-WINDOW_SIZE:] dots5.data_source.data['y'] = list(df5['value'])[-WINDOW_SIZE:] dots5.data_source.data['x'] = range(len(list( df5['value'])))[-WINDOW_SIZE:] else: r5.data_source.data['y'] = list(df5['value']) r5.data_source.data['x'] = range(len(list(df5['value']))) dots5.data_source.data['y'] = list(df5['value']) dots5.data_source.data['x'] = range(len(list(df5['value']))) # Sliding window of the WINDOW_SIZE most recent values if len(df6['value']) > WINDOW_SIZE: r6.data_source.data['y'] = list(df6['value'])[-WINDOW_SIZE:] r6.data_source.data['x'] = range(len(list( df6['value'])))[-WINDOW_SIZE:] dots6.data_source.data['y'] = list(df6['value'])[-WINDOW_SIZE:] dots6.data_source.data['x'] = range(len(list( df6['value'])))[-WINDOW_SIZE:] else: r6.data_source.data['y'] = list(df6['value']) r6.data_source.data['x'] = range(len(list(df6['value']))) dots6.data_source.data['y'] = list(df6['value']) dots6.data_source.data['x'] = range(len(list(df6['value']))) # Sliding window of the WINDOW_SIZE most recent values if len(df7['value']) > WINDOW_SIZE: r7.data_source.data['y'] = list(df7['value'])[-WINDOW_SIZE:] r7.data_source.data['x'] = range(len(list( df7['value'])))[-WINDOW_SIZE:] dots7.data_source.data['y'] = list(df7['value'])[-WINDOW_SIZE:] dots7.data_source.data['x'] = range(len(list( df7['value'])))[-WINDOW_SIZE:] else: r7.data_source.data['y'] = list(df7['value']) r7.data_source.data['x'] = range(len(list(df7['value']))) dots7.data_source.data['y'] = list(df7['value']) dots7.data_source.data['x'] = range(len(list(df7['value'])))
def get_trans(): props = { 'bootstrap.servers': '10.1.1.133:9092', # Kafka集群在那裡? (置換成要連接的Kafka集群) 'group.id': 'peter', # ConsumerGroup的名稱 (置換成你/妳的學員ID) 'auto.offset.reset': 'latest', # 是否從這個ConsumerGroup尚未讀取的partition/offset開始讀earliest 'enable.auto.commit': False, # 是否啟動自動commit 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Consumer的實例 consumer = Consumer(props) # 步驟3. 指定想要訂閱訊息的topic名稱 topicName = 'transaction' # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic consumer.subscribe([topicName]) # 步驟5. 持續的拉取Kafka有進來的訊息 try: while True: records_pulled = False # 用來檢查是否有有效的record被取出來 # 請求Kafka把新的訊息吐出來 records = consumer.consume(num_messages=500, timeout=1.0) # 批次讀取 if records is None: continue for record in records: # 檢查是否有錯誤 if record is None: continue if record.error(): # Error or event if record.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (record.topic(), record.partition(), record.offset())) else: # Error raise KafkaException(record.error()) else: records_pulled = True # ** 在這裡進行商業邏輯與訊息處理 ** # 取出相關的metadata # topic = record.topic() # partition = record.partition() # offset = record.offset() # timestamp = record.timestamp() # 取出msgKey與msgValue try: msgKey2 = try_decode_utf8(record.key()) msgValue2 = try_decode_utf8(record.value()) sendmsg_trans = {msgKey2: msgValue2} print(sendmsg_trans) return sendmsg_trans finally: client = pymongo.MongoClient( "mongodb+srv://peter:[email protected]/ceb101?retryWrites=true&w=majority" ) mydb = client.wow mycol = mydb['fit'] mycol.insert_many([sendmsg_trans]) # 秀出metadata與msgKey & msgValue訊息 # print('%s-%d-%d : (%s , %s)' % (topic, partition, offset, msgKey, msgValue)) # test1_msg = ("%s" , "%s" % (msgKey, msgValue)) #('%s : %s' % (msgKey,msgValue)) # 同步地執行commit (Sync commit) # if records_pulled: # offsets = consumer.commit(asynchronous=False) # print_sync_commit_result(offsets) except KeyboardInterrupt as e: sys.stderr.write('Aborted by user\n') except Exception as e: sys.stderr.write(str(e)) finally: consumer.commit(asynchronous=False) # 步驟6.關掉Consumer實例的連線 consumer.close()
def _error_cb(self, error): if error.fatal(): raise KafkaException(error) logger.info("Received non-fatal kafka error: %s", error)
def handle(self, *args, **kwargs): c = Consumer(**CONFIG) topic = django.conf.settings.CLOUDKARAFKA_TOPIC_GEONAMES c.subscribe([topic]) logger.info('Subscribed to {0} topic \n'.format(topic)) try: while True: msg = c.poll(timeout=1.0) if msg is None: continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '{0} [{1}] reached end at offset {2}\n' ''.format(msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: geoname_item = msg.value() geoname_item = geoname_item.decode() geoname_item = json.loads(geoname_item) geoname_values = {} for key, value in geoname_item.items(): if key == 'osm_id': try: osm_rel = \ models.OpenStreetMap.objects.get( osm_id=value ) value = osm_rel.id except models.OpenStreetMap.DoesNotExist: value = None if key == 'osm_id' and not value: continue geoname_values[key] = value try: geoname = models.GeoName.objects.get( geoname_id=geoname_values['geoname_id']) for attr, attr_val in geoname_values.items(): attr_val = attr_val or '' if attr != 'geoname_id': setattr(geoname, attr, attr_val) geoname.save() except models.GeoName.DoesNotExist: models.GeoName.objects.create(**geoname_values) sys.stderr.write('{0} [{1}] at offset {2}\n' ''.format(msg.topic(), msg.partition(), msg.offset())) logger.info(geoname_item) except KeyboardInterrupt: logger.warning('Aborted by user\n') # Close down consumer to commit final offsets. c.close()
def fail_fast(err, msg): if err is not None: print("Kafka producer delivery error: {}".format(err), file=sys.stderr) print("Bailing out...", file=sys.stderr) # TODO: should it be sys.exit(-1)? raise KafkaException(err)
def run(self): logger.debug("Starting snuba query subscriber") self.offsets.clear() def on_assign(consumer, partitions): updated_partitions = [] for partition in partitions: if self.resolve_partition_force_offset: partition = self.resolve_partition_force_offset(partition) updated_partitions.append(partition) if partition.offset == OFFSET_INVALID: updated_offset = None else: updated_offset = partition.offset self.offsets[partition.partition] = updated_offset if updated_partitions: self.consumer.assign(updated_partitions) logger.info( "query-subscription-consumer.on_assign", extra={ "offsets": six.text_type(self.offsets), "partitions": six.text_type(partitions), }, ) def on_revoke(consumer, partitions): partition_numbers = [partition.partition for partition in partitions] self.commit_offsets(partition_numbers) for partition_number in partition_numbers: self.offsets.pop(partition_number, None) logger.info( "query-subscription-consumer.on_revoke", extra={ "offsets": six.text_type(self.offsets), "partitions": six.text_type(partitions), }, ) self.consumer = Consumer(self.cluster_options) if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS: # This is required for confluent-kafka>=1.5.0, otherwise the topics will # not be automatically created. admin_client = AdminClient(self.admin_cluster_options) wait_for_topics(admin_client, [self.topic]) self.consumer.subscribe([self.topic], on_assign=on_assign, on_revoke=on_revoke) try: i = 0 while True: message = self.consumer.poll(0.1) if message is None: continue error = message.error() if error is not None: raise KafkaException(error) i = i + 1 with sentry_sdk.start_transaction( op="handle_message", name="query_subscription_consumer_process_message", sampled=True, ), metrics.timer("snuba_query_subscriber.handle_message"): self.handle_message(message) # Track latest completed message here, for use in `shutdown` handler. self.offsets[message.partition()] = message.offset() + 1 if i % self.commit_batch_size == 0: logger.debug("Committing offsets") self.commit_offsets() except KeyboardInterrupt: pass self.shutdown()
def on_delivery(err, msg): if err is not None: raise KafkaException(err)
def kafkaMysqlRedisLinebot(): # 步驟1.設定要連線到Kafka集群的相關設定 # Consumer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md props = { 'bootstrap.servers': 'kafka:29092', # Kafka集群在那裡? (置換成要連接的Kafka集群) 'group.id': 'iii', # ConsumerGroup的名稱 (置換成你/妳的學員ID) 'auto.offset.reset': 'earliest', # Offset從最前面開始 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Consumer的實例 consumer = Consumer(props) # 步驟3. 指定想要訂閱訊息的topic名稱 topicName = 'Shopping_list2' # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic consumer.subscribe([topicName], on_assign=print_assignment, on_revoke=print_revoke) # 步驟5. 持續的拉取Kafka有進來的訊息 try: while True: # 請求Kafka把新的訊息吐出來 records = consumer.consume(num_messages=500, timeout=1.0) # 批次讀取 if records is None: continue for record in records: # 檢查是否有錯誤 if record is None: continue if record.error(): # Error or event if record.error().code() == KafkaError._PARTITION_EOF: print('') # End of partition event # sys.stderr.write('%% %s [%d] reached end at offset %d\n' % # (record.topic(), record.partition(), record.offset())) else: # Error raise KafkaException(record.error()) else: # ** 在這裡進行商業邏輯與訊息處理 ** # 取出相關的metadata topic = record.topic() partition = record.partition() offset = record.offset() timestamp = record.timestamp() # 取出msgKey與msgValue msgKey = try_decode_utf8(record.key()) msgValue = try_decode_utf8(record.value()) if msgKey != 'end': # 從kafka吸出資料後,先整理成可供MySQL查詢的狀態 msgValue2 = msgValue.split(' ') # 從Redis查詢購物車商品的基本資料並整理'value' message = '' for i in range(len(msgValue2)): price = redis.get('{}'.format(msgValue2[i])) price = int(price.decode('utf-8')) message += '{}:{}:{}:{}'.format( msgValue2[i], price, 1, price * 1) if i < len(msgValue2) - 1: message += ',' # 將用戶的購物車資料存入redis(key:userID, value:購物車資料) redis.set(msgKey, message) redis.expire(msgKey, 600) # 將該用戶的userID寫入txt,使結帳時能知道從redis抓出的用戶是誰 with open('trade_user.txt', 'w', encoding='utf-8') as f: f.write(msgKey) else: # 在用戶走出結帳區,能知道從redis抓出的用戶是誰 with open('trade_user.txt', 'r', encoding='utf-8') as f: userID = f.read() # 整理成能存入MySQL的時間格式 timestamp = timestamp[1] / 1000 timestamp = time.localtime(timestamp) timestamp = time.strftime("%Y-%m-%d %H:%M:%S", timestamp) # 從Redis 抓出並整理用戶的購物車購買資訊 content = redis.get('{}'.format(userID)) trade_info = content.decode('utf-8').split(',') # 把用戶的購買資訊存入MySQL for i in trade_info: sql = '''select productID from product where productName = '{}';'''.format( i.split(':')[0]) cursor.execute(sql) productID = cursor.fetchall()[0][0] sql2 = '''INSERT INTO shoppinglist (userID,shoppingdate,productID,quantity) VALUE ('{}','{}','{}',1);'''.format( userID, timestamp, productID) cursor.execute(sql2) conn.commit() # 結束交易後,用爬蟲方式向該會員傳送交易結束訊息 url = 'http://localhost:5000/thank/{}'.format(userID) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' } requests.get(url=url, headers=headers) # 秀出metadata與msgKey & msgValue訊息 print('%s-%d-%d : (%s , %s)' % (topic, partition, offset, msgKey, msgValue)) except KeyboardInterrupt as e: sys.stderr.write('Aborted by user\n') except Exception as e: sys.stderr.write(str(e)) finally: # 步驟6.關掉Consumer實例的連線 consumer.close() # MySQL cursor.close() conn.close()
def ack(err, msg): if err: self._loop.call_soon_threadsafe(result.set_exception, KafkaException(err)) else: self._loop.call_soon_threadsafe(result.set_result, msg)
def delivery_callback(err, msg): if err: raise KafkaException(err) else: pass
if records is None: continue for record in records: # 檢查是否有錯誤 if record is None: continue if record.error(): # 偵測是否己經讀到了partiton的最尾端 if record.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write('%% %s [%d] reached end at offset %d\n' % (record.topic(), record.partition(), record.offset())) else: # Error raise KafkaException(record.error()) else: record_counter += 1 # ** 在這裡進行商業邏輯與訊息處理 ** # 取出相關的metadata topic = record.topic() partition = record.partition() offset = record.offset() timestamp = record.timestamp() # 取出msgKey與msgValue msgKey = try_decode_utf8(record.key()) # << 這個是row_id編號 msgValue = try_decode_utf8(record.value()) # << 這個是taxidata資料 taxidata = json.loads(msgValue) # 根據題義取出以下pickup_datetime
def poll(self, group_id, timeout=1, max_records=1, poll_attempts=10, only_value=True, auto_create_topics=True, decode_format=None, fail_on_deserialization=False): """Fetch and return messages from assigned topics / partitions as list. - ``timeout`` (int): Seconds spent waiting in poll if data is not available in the buffer.\n - ``max_records`` (int): maximum number of messages to get from poll. Default: 1. If 0, returns immediately with any records that are available currently in the buffer, else returns empty. Must not be negative. Default: `1` - ``poll_attempts`` (int): Attempts to consume messages and endless looping prevention. Sometimes the first messages are None or the topic could be empty. Default: `10`. - ``only_value`` (bool): Return only message.value(). Default: `True`. - ``decode_format`` (str) - If you need to decode data to specific format (See https://docs.python.org/3/library/codecs.html#standard-encodings). Default: None. - ``auto_create_topics`` (bool): Consumers no longer trigger auto creation of topics, will be removed in future release. If True then the error message UNKNOWN_TOPIC_OR_PART is ignored. Default: `True`. - ``fail_on_deserialization`` (bool): If True and message deserialization fails, will raise a SerializerError exception; on False will just stop the current poll and return the message so far. Default: `False`. """ messages = [] while poll_attempts > 0: msg = None try: msg = self.consumers[group_id].poll(timeout=timeout) except SerializerError as err: error = 'Message deserialization failed for {}: {}'.format( msg, err) if fail_on_deserialization: raise SerializerError(error) else: print(error) break if msg is None: poll_attempts -= 1 continue if msg.error(): # Workaround due to new message return + deprecation of the "Consumers no longer trigger auto creation of topics" if int( msg.error().code() ) == KafkaError.UNKNOWN_TOPIC_OR_PART and auto_create_topics == True: continue else: raise KafkaException(msg.error()) if only_value: messages.append(msg.value()) else: messages.append(msg) if len(messages) == max_records: break if decode_format: messages = self._decode_data(data=messages, decode_format=decode_format) return messages