def test_murmur2_java_compatibility(): p = Murmur2Partitioner(range(1000)) # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner assert p.partition(b'') == 681 assert p.partition(b'a') == 524 assert p.partition(b'ab') == 434 assert p.partition(b'abc') == 107 assert p.partition(b'123456789') == 566 assert p.partition(b'\x00 ') == 742
def test_murmur2_java_compatibility(self): p = Murmur2Partitioner(range(1000)) # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner self.assertEqual(681, p.partition(b'')) self.assertEqual(524, p.partition(b'a')) self.assertEqual(434, p.partition(b'ab')) self.assertEqual(107, p.partition(b'abc')) self.assertEqual(566, p.partition(b'123456789')) self.assertEqual(742, p.partition(b'\x00 '))
def run(self): """Publish video frames as json objects, timestamped, marked with camera number. Source: self.video_path: URL for streaming video self.kwargs["use_cv2"]: use raw cv2 streaming, set to false to use smart fast streaming --> not every frame is sent. Publishes: A dict {"frame": string(base64encodedarray), "dtype": obj.dtype.str, "shape": obj.shape, "timestamp": time.time(), "camera": camera, "frame_num": frame_num} """ if self.rr_distribute: partitioner = RoundRobinPartitioner(partitions= [TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions)]) else: partitioner = Murmur2Partitioner(partitions= [TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions)]) # Producer object, set desired partitioner frame_producer = KafkaProducer(bootstrap_servers=["kafka1-kafka-brokers:9092"], key_serializer=lambda key: str(key).encode(), value_serializer=lambda value: json.dumps(value).encode(), partitioner=partitioner) print("[CAM {}] URL: {}, SET PARTITIONS FOR FRAME TOPIC: {}".format(self.camera_num, self.video_path, frame_producer.partitions_for( self.frame_topic))) # Use either option video = cv2.VideoCapture(self.video_path) if self.use_cv2 else FileVideoStream(self.video_path).start() #video.set(cv2.CAP_PROP_FPS,30) # Track frame number frame_num = 0 start_time = time.time() print("[CAM {}] START TIME {}: ".format(self.camera_num, start_time)) # Read URL, Transform, Publish while True: # using raw cv2, frame by frame if self.use_cv2: success, image = video.read() # check if the file has read if not success: if self.verbose: print("[CAM {}] URL: {}, END FRAME: {}".format(self.name, self.video_path, frame_num)) break # using smart, only unique frames, skips frames, faster fps else: image = video.read() # check if the file has read if image is None: if self.verbose: print("[CAM {}] URL: {}, END FRAME: {}".format(self.name, self.video_path, frame_num)) break # Attach metadata to frame, transform into JSON message = self.transform(frame=image, frame_num=frame_num, object_key=self.object_key, camera=self.camera_num, verbose=self.verbose) # Partition to be sent to part = frame_num % self.topic_partitions # Logging if self.verbose: print("\r[PRODUCER][Cam {}] FRAME: {} TO PARTITION: {}".format(message["camera"], frame_num, part)) # Publish to specific partition frame_producer.send(self.frame_topic, key="{}-{}".format(self.camera_num, frame_num), value=message) # if frame_num % 1000 == 0: frame_producer.flush() frame_num += 1 # clear the capture if self.use_cv2: video.release() else: video.stop() if self.verbose: print("[CAM {}] FINISHED. STREAM TIME {}: ".format(self.camera_num, time.time() - start_time)) return True if frame_num > 0 else False
def test_hash_encoding(): p = Murmur2Partitioner(range(1000)) assert p.partition('test') == p.partition(u'test')
def test_hash_bytes(): p = Murmur2Partitioner(range(1000)) assert p.partition(bytearray(b'test')) == p.partition(b'test')
def run(self): """Publish video frames as json objects, timestamped, marked with camera number. Source: self.video_path: URL for streaming video self.kwargs["use_cv2"]: use raw cv2 streaming, set to false to use smart fast streaming --> not every frame is sent. Publishes: A dict {"frame": string(base64encodedarray), "dtype": obj.dtype.str, "shape": obj.shape, "timestamp": time.time(), "camera": camera, "frame_num": frame_num} """ if self.rr_distribute: partitioner = RoundRobinPartitioner(partitions=[ TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions) ]) else: partitioner = Murmur2Partitioner(partitions=[ TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions) ]) # Producer object, set desired partitioner frame_producer = KafkaProducer( bootstrap_servers=[params.KAFKA_BROKER], key_serializer=lambda key: str(key).encode(), value_serializer=lambda value: json.dumps(value).encode(), partitioner=partitioner, max_request_size=134217728) print("[CAM {}] URL: {}, SET PARTITIONS FOR FRAME TOPIC: {}".format( self.camera_num, self.video_path, frame_producer.partitions_for(self.frame_topic))) # Use either option if self.use_cv2: # video = cv2.VideoCapture(self.video_path) # Here we use sampler to read all videos from a folder self.sampler.add_video(self.video_path) else: video = VideoStream(self.video_path).start() # Track frame number frame_num = 0 start_time = time.time() print("[CAM {}] START TIME {}: ".format(self.camera_num, start_time)) while True: if self.use_cv2: success, image, self.location = self.sampler.read() if not success: if self.verbose: print("[CAM {}] URL: {}, END FRAME: {}".format( self.name, self.video_path, frame_num)) break else: image = video.read() if image is None: if self.verbose: print("[CAM {}] URL: {}, END FRAME: {}".format( self.name, self.video_path, frame_num)) break # Attach metadata to frame, transform into JSON message = self.transform(frame=image, frame_num=frame_num, location=self.location, object_key=self.object_key, camera=self.camera_num, verbose=self.verbose) self.sizecnt += 1 if time.time() - self.timer > self.report_range: acc = self.sizecnt #if self.verbose: print("[Cam {}]Minute {} send out size {}".format( self.camera_num, int(self.timer - self.zerotime) // self.report_range, acc)) self.sizecnt = 0 self.timer = time.time() # Callback function def on_send_success(record_metadata): print(record_metadata.topic) print(record_metadata.partition) print(record_metadata.offset) def on_send_error(excp): print(excp) # log.error('I am an errback', exc_info=excp) # Partition to be sent to part = frame_num % self.topic_partitions # Logging # Publish to specific partition if self.verbose: print("\r[PRODUCER][Cam {}] FRAME: {} TO PARTITION: {}".format( message["camera"], frame_num, part)) frame_producer.send( self.frame_topic, key="{}_{}".format(self.camera_num, frame_num), value=message).add_callback(on_send_success).add_errback( on_send_error) else: frame_producer.send(self.frame_topic, key="{}_{}".format(self.camera_num, frame_num), value=message) # if frame_num % 1000 == 0: frame_producer.flush() frame_num += 1 if self.use_cv2: self.sampler.release() else: video.stop() if self.verbose: print("[CAM {}] FINISHED. STREAM TIME {}: ".format( self.camera_num, time.time() - start_time)) return True if frame_num > 0 else False
def run(self): """Consume raw frames, detects faces, finds their encoding [PRE PROCESS], predictions Published to processed_frame_topic fro face matching.""" # Connect to kafka, Consume frame obj bytes deserialize to json partition_assignment_strategy = [ RoundRobinPartitionAssignor ] if self.rr_distribute else [ RangePartitionAssignor, RoundRobinPartitionAssignor ] frame_consumer = KafkaConsumer( group_id="consume", client_id=self.iam, bootstrap_servers=["kafka1-kafka-brokers:9092"], key_deserializer=lambda key: key.decode(), value_deserializer=lambda value: json.loads(value.decode()), partition_assignment_strategy=partition_assignment_strategy, auto_offset_reset="earliest") frame_consumer.subscribe([self.frame_topic]) # partitioner for processed frame topic if self.rr_distribute: partitioner = RoundRobinPartitioner(partitions=[ TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions) ]) else: partitioner = Murmur2Partitioner(partitions=[ TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions) ]) # Produces prediction object processed_frame_producer = KafkaProducer( bootstrap_servers=["kafka1-kafka-brokers:9092"], key_serializer=lambda key: str(key).encode(), value_serializer=lambda value: json.dumps(value).encode(), partitioner=partitioner) try: while True: if self.verbose: print( "[ConsumeFrames {}] WAITING FOR NEXT FRAMES..".format( socket.gethostname())) raw_frame_messages = frame_consumer.poll(timeout_ms=10, max_records=2000) for topic_partition, msgs in raw_frame_messages.items(): # Get the predicted Object, JSON with frame and meta info about the frame for msg in msgs: # get pre processing result result = self.get_processed_frame_object( msg.value, self.scale) tp = TopicPartition(msg.topic, msg.partition) offsets = {tp: OffsetAndMetadata(msg.offset, None)} frame_consumer.commit(offsets=offsets) # Partition to be sent to processed_frame_producer.send( self.processed_frame_topic, key="{}-{}".format(result["camera"], result["frame_num"]), value=result) processed_frame_producer.flush() except KeyboardInterrupt as e: print(e) pass finally: print("Closing Stream") frame_consumer.close()
def run(self): # Connect to kafka, Consume frame obj bytes deserialize to json partition_assignment_strategy = [ RoundRobinPartitionAssignor ] if self.rr_distribute else [ RangePartitionAssignor, RoundRobinPartitionAssignor ] porter_consumer = KafkaConsumer( group_id=self.group_id, client_id=self.iam, bootstrap_servers=[params.KAFKA_BROKER], key_deserializer=lambda key: key.decode(), value_deserializer=lambda value: json.loads(value.decode()), partition_assignment_strategy=partition_assignment_strategy, auto_offset_reset="earliest") porter_consumer.subscribe([self.frame_topic]) # partitioner for processed frame topic if self.rr_distribute: partitioner = RoundRobinPartitioner(partitions=[ TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions) ]) else: partitioner = Murmur2Partitioner(partitions=[ TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions) ]) # Produces prediction object url_producer = KafkaProducer( bootstrap_servers=[params.KAFKA_BROKER], key_serializer=lambda key: str(key).encode(), value_serializer=lambda value: json.dumps(value).encode(), partitioner=partitioner) try: while True: # if self.verbose: # print("[ConsumeFrames {}] WAITING FOR NEXT FRAMES..".format(socket.gethostname())) raw_frame_messages = porter_consumer.poll(timeout_ms=10, max_records=10) for topic_partition, msgs in raw_frame_messages.items(): for msg in msgs: result = self.store_tmp_frame(msg.value) if self.verbose: print(result["frame_num"]) print(result['s3_key']) tp = TopicPartition(msg.topic, msg.partition) offsets = {tp: OffsetAndMetadata(msg.offset, None)} porter_consumer.commit(offsets=offsets) # Partition to be sent to url_producer.send(self.url_topic, key="{}_{}".format( result["camera"], result["frame_num"]), value=result) url_producer.flush() except KeyboardInterrupt as e: print(e) pass finally: print("Closing Stream") porter_consumer.close()
def run(self): """Consume raw frames, detects faces, finds their encoding [PRE PROCESS], predictions Published to processed_frame_topic fro face matching.""" # Connect to kafka, Consume frame obj bytes deserialize to json partition_assignment_strategy = [ RoundRobinPartitionAssignor ] if self.rr_distribute else [ RangePartitionAssignor, RoundRobinPartitionAssignor ] meta_consumer = KafkaConsumer( group_id=self.group_id, client_id=self.iam, bootstrap_servers=[params.KAFKA_BROKER], key_deserializer=lambda key: key.decode(), value_deserializer=lambda value: json.loads(value.decode()), partition_assignment_strategy=partition_assignment_strategy, auto_offset_reset="earliest") meta_consumer.subscribe([self.meta_topic]) # partitioner for processed frame topic if self.rr_distribute: partitioner = RoundRobinPartitioner(partitions=[ TopicPartition(topic=self.value_topic, partition=i) for i in range(self.topic_partitions) ]) else: partitioner = Murmur2Partitioner(partitions=[ TopicPartition(topic=self.value_topic, partition=i) for i in range(self.topic_partitions) ]) # Produces prediction object value_producer = KafkaProducer( bootstrap_servers=[params.KAFKA_BROKER], key_serializer=lambda key: str(key).encode(), value_serializer=lambda value: json.dumps(value).encode(), partitioner=partitioner) history_cnt = None try: while True: meta_messages = meta_consumer.poll(timeout_ms=10, max_records=10) for topic_partition, msgs in meta_messages.items(): # Get the predicted Object, JSON with frame and meta info about the frame for msg in msgs: # get pre processing result if self.verbose: print("TEST0 {}".format(msg.value)) result = parse_mapper(msg.value) self.transfer_scene_type(result) # Using the buffer to keep time order heappush(self.buffer, (result['frame_num'], json.dumps(result))) if len(self.buffer) < self.uppersize: tp = TopicPartition(msg.topic, msg.partition) offsets = {tp: OffsetAndMetadata(msg.offset, None)} meta_consumer.commit(offsets=offsets) continue result = json.loads(heappop(self.buffer)[1]) if self.verbose: print("TEST1 {}".format(result)) # Extract keyframe new_cnt = Counter(result['counts']) if history_cnt is None: result['is_keyframe'] = True else: result['is_keyframe'] = (new_cnt != history_cnt) history_cnt = new_cnt if self.verbose: print("TEST2 {}".format(result)) # Scene statistic scenecnt = Counter(result['scenes']) self.counter += scenecnt self.frame_cnt += 1 if result['is_keyframe']: self.keyframe_cnt += 1 # Need to be refined later result['valuable'] = self.is_valuable(result) # Update some statistic informations every minute if time.time() - self.timer > 10: self.update_acc_table(result) print('Extractor {}'.format(result)) if self.verbose: print("[Extractor done]") print(result) tp = TopicPartition(msg.topic, msg.partition) offsets = {tp: OffsetAndMetadata(msg.offset, None)} meta_consumer.commit(offsets=offsets) # Partition to be sent to value_producer.send(self.value_topic, value=result) value_producer.flush() except KeyboardInterrupt as e: print(e) pass finally: while self.buffer: result = json.loads(heappop(self.buffer)[1]) if self.verbose: print("TEST1 {}".format(result)) # Extract keyframe new_cnt = Counter(result['counts']) if history_cnt is None: result['is_keyframe'] = True else: result['is_keyframe'] = (new_cnt != history_cnt) history_cnt = new_cnt if self.verbose: print("TEST2 {}".format(result)) # Scene statistic scenecnt = Counter(result['scenes']) self.counter += scenecnt self.frame_cnt += 1 if result['is_keyframe']: self.keyframe_cnt += 1 # Need to be refined later result['valuable'] = self.is_valuable(result) # Update some statistic informations every minute if len(self.buffer) == 0: self.update_acc_table(result) print('Extractor {}'.format(result)) if self.verbose: print("[Extractor done]") print(result) # Partition to be sent to value_producer.send(self.value_topic, value=result) value_producer.flush() print("Closing Stream") meta_consumer.close()
def test_murmur2_java_compatibility(bytes_payload, partition_number): p = Murmur2Partitioner(range(1000)) # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner assert p.partition(bytes_payload) == partition_number
def run(self): """Consume raw frames, detects faces, finds their encoding [PRE PROCESS], predictions Published to processed_frame_topic fro face matching.""" # Connect to kafka, Consume frame obj bytes deserialize to json partition_assignment_strategy = [ RoundRobinPartitionAssignor ] if self.rr_distribute else [ RangePartitionAssignor, RoundRobinPartitionAssignor ] url_consumer = KafkaConsumer( group_id=self.group_id, client_id=self.iam, bootstrap_servers=[params.KAFKA_BROKER], key_deserializer=lambda key: key.decode(), value_deserializer=lambda value: json.loads(value.decode()), partition_assignment_strategy=partition_assignment_strategy, auto_offset_reset="earliest") url_consumer.subscribe([self.url_topic]) # partitioner for processed frame topic if self.rr_distribute: partitioner = RoundRobinPartitioner(partitions=[ TopicPartition(topic=self.obj_topic, partition=i) for i in range(self.topic_partitions) ]) else: partitioner = Murmur2Partitioner(partitions=[ TopicPartition(topic=self.obj_topic, partition=i) for i in range(self.topic_partitions) ]) # Produces prediction object obj_producer = KafkaProducer( bootstrap_servers=[params.KAFKA_BROKER], key_serializer=lambda key: str(key).encode(), value_serializer=lambda value: json.dumps(value).encode(), partitioner=partitioner) try: while True: raw_frame_messages = url_consumer.poll(timeout_ms=10, max_records=10) for topic_partition, msgs in raw_frame_messages.items(): # Get the predicted Object, JSON with frame and meta info about the frame for msg in msgs: # get pre processing result if self.cnt < 0: self.timer = time.time() self.cnt = 0 result = self.get_processed_frame_object(msg.value) # Calculate latency: self.cnt += 1 if self.cnt == self.latency_period: latency = (time.time() - self.timer) / float( self.latency_period) self.timer = time.time() self.cnt = 0 print("[Detection] Latency {}".format(latency)) if self.verbose: print(result) tp = TopicPartition(msg.topic, msg.partition) offsets = {tp: OffsetAndMetadata(msg.offset, None)} url_consumer.commit(offsets=offsets) # Partition to be sent to send_topic = self.obj_topic + '_' + str( result['camera']) obj_producer.send(send_topic, value=result) obj_producer.flush() except KeyboardInterrupt as e: print(e) pass finally: print("Closing Stream") url_consumer.close()
def test_hash_bytes(self): p = Murmur2Partitioner(range(1000)) self.assertEqual(p.partition(bytearray(b'test')), p.partition(b'test'))
def test_hash_encoding(self): p = Murmur2Partitioner(range(1000)) self.assertEqual(p.partition('test'), p.partition(u'test'))