def main(brokers, topic): consumer = KafkaConsumer(bootstrap_servers=brokers) print(f"Topics = {consumer.topics()}") tp = TopicPartition(topic, 0) consumer.assign([tp]) # Move to one from the end consumer.seek_to_end(tp) end = consumer.position(tp) consumer.seek(tp, end - 1) while True: data = [] while not data: data = consumer.poll(5) for message in data[tp]: print("%s %s:%d:%d: key=%s value=%s" % ( message.timestamp, message.topic, message.partition, message.offset, message.key, message.value[0:20], )) ans = deserialise_hs00(message.value) print(f"\nHistogram data:\n{ans}") print(f"Total events: {ans['data'].sum()}")
def test_if_histogram_has_id_then_that_is_added_to_the_info_field(self): histogrammer = create_histogrammer(self.hist_sink, START_CONFIG) histogrammer.add_data(EVENT_DATA) histogrammer.publish_histograms() data = deserialise_hs00(self.spy_producer.messages[0][1]) info = json.loads(data["info"]) assert info["id"] == "abcdef"
def test_published_histogram_has_non_default_timestamp_set(self): histogrammer = create_histogrammer(self.hist_sink, STOP_CONFIG) histogrammer.add_data(EVENT_DATA) timestamp = 1234567890 histogrammer.publish_histograms(timestamp) data = deserialise_hs00(self.spy_producer.messages[0][1]) assert data["timestamp"] == timestamp
def test_before_counting_published_histogram_is_labelled_to_indicate_not_started( self): histogrammer = create_histogrammer(self.hist_sink, START_CONFIG) histogrammer.publish_histograms() data = deserialise_hs00(self.spy_producer.messages[0][1]) info = json.loads(data["info"]) assert info["state"] == HISTOGRAM_STATES["INITIALISED"]
def test_serialises_hs00_message_with_info_field_filled_out_correctly( self): """ Sanity check: checks the combination of libraries work as expected. """ info_message = "info_message" buf = serialise_hs00(self.hist_1d, info_message=info_message) hist = deserialise_hs00(buf) assert hist["info"] == info_message
def test_after_stop_published_histogram_is_labelled_to_indicate_finished( self): histogrammer = create_histogrammer(self.hist_sink, STOP_CONFIG) histogrammer.add_data(EVENT_DATA) histogrammer.publish_histograms() data = deserialise_hs00(self.spy_producer.messages[0][1]) info = json.loads(data["info"]) assert info["state"] == HISTOGRAM_STATES["FINISHED"]
def test_while_counting_published_histogram_is_labelled_to_indicate_counting( self): histogrammer = create_histogrammer(self.hist_sink, START_CONFIG) histogrammer.add_data(EVENT_DATA) histogrammer.publish_histograms() data = deserialise_hs00(self.spy_producer.messages[0][1]) info = json.loads(data["info"]) assert info["state"] == HISTOGRAM_STATES["COUNTING"]
def get_hist_data_from_kafka(self): data = [] # Move it to one from the end so we can read the final histogram self.consumer.seek_to_end(self.topic_part) end_pos = self.consumer.position(self.topic_part) self.consumer.seek(self.topic_part, end_pos - 1) while not data: data = self.consumer.poll(5) msg = data[self.topic_part][-1] return deserialise_hs00(msg.value)
def test_if_timestamp_not_supplied_then_it_is_zero(self): """ Sanity check: checks the combination of libraries work as expected. """ buf = serialise_hs00(self.hist_1d) hist = deserialise_hs00(buf) assert hist["source"] == "just-bin-it" assert hist["timestamp"] == 0 assert hist["current_shape"] == [self.hist_1d.num_bins] assert np.array_equal(hist["dim_metadata"][0]["bin_boundaries"], self.hist_1d.x_edges.tolist()) assert hist["dim_metadata"][0]["length"] == self.hist_1d.num_bins assert np.array_equal(hist["data"], self.hist_1d.data)
def test_serialises_hs00_message_correctly_for_1d(self): """ Sanity check: checks the combination of libraries work as expected. """ timestamp = 1234567890 buf = serialise_hs00(self.hist_1d, timestamp) hist = deserialise_hs00(buf) assert hist["source"] == "just-bin-it" assert hist["timestamp"] == timestamp assert hist["current_shape"] == [self.hist_1d.num_bins] assert np.array_equal(hist["dim_metadata"][0]["bin_boundaries"], self.hist_1d.x_edges.tolist()) assert hist["dim_metadata"][0]["length"] == self.hist_1d.num_bins assert np.array_equal(hist["data"], self.hist_1d.data)
def test_deserialises_hs00_message_correctly(self): """ Sanity check: checks the combination of libraries work as expected. """ data = deserialise_hs00(self.buf) assert data["source"] == "just-bin-it" assert data["timestamp"] == 987_654_321 assert data["current_shape"] == [50] assert len(data["data"]) == 50 assert len(data["dim_metadata"]) == 1 assert data["info"] == "hello" assert data["dim_metadata"][0]["length"] == 50 assert len(data["dim_metadata"][0]["bin_boundaries"]) == 51 assert data["dim_metadata"][0]["bin_boundaries"][0] == 0.0 assert data["dim_metadata"][0]["bin_boundaries"][50] == 100_000_000.0
def _process_record(self, record): try: return deserialise_hs00(record) except Exception as error: raise SourceException(error)
def test_if_schema_is_incorrect_then_throws(self): new_buf = self.buf[:4] + b"na12" + self.buf[8:] with pytest.raises(JustBinItException): deserialise_hs00(new_buf)