def make_and_start_dispatcher(document_queue): def put_in_queue(name, doc): document_queue.put((name, doc)) kafka_dispatcher = RemoteDispatcher( topics=[kafka_topic], bootstrap_servers=bootstrap_servers, group_id="test_kafka_publisher", consumer_config={ "auto.offset.reset": "latest", "auto.commit.interval.ms": 100, }, polling_duration=1.0, deserializer=partial(msgpack.loads, object_hook=mpn.decode), ) kafka_dispatcher.subscribe(put_in_queue) kafka_dispatcher.start()
parser.add_argument("--kafka_server", type=str, help="bootstrap server to connect to.", default="127.0.0.1:9092") args = parser.parse_args() bootstrap_servers = args.kafka_server kafka_dispatcher = RemoteDispatcher( topics=["mad.bluesky.documents"], bootstrap_servers=bootstrap_servers, group_id="kafka-unit-test-group-id", # "latest" should always work but # has been failing on Linux, passing on OSX consumer_config={"auto.offset.reset": "latest"}, polling_duration=1.0, deserializer=partial(msgpack.loads, object_hook=mpn.decode), ) def echo(name, doc): ts = doc.get("time", 0) print(f"{datetime.datetime.now().isoformat()}: " f"({datetime.datetime.fromtimestamp(ts)})" f" {name} ") kafka_dispatcher.subscribe(echo) kafka_dispatcher.start()
def export(*args): filenames = view.export_all(directory) print("\n".join(f'"{filename}"' for filename in filenames)) view.close() if run_is_live_and_not_completed(run): run.events.new_data.connect(export) else: export() if __name__ == "__main__": bootstrap_servers = "127.0.0.1:9092" kafka_deserializer = partial(msgpack.loads, object_hook=mpn.decode) topics = ["widgets_test.bluesky.documents"] consumer_config = { "auto.commit.interval.ms": 100, "auto.offset.reset": "latest" } dispatcher = RemoteDispatcher( topics=topics, bootstrap_servers=bootstrap_servers, group_id="widgets_test", consumer_config=consumer_config, ) dispatcher.subscribe( stream_documents_into_runs(export_thumbnails_when_complete)) dispatcher.start()
class RedisQueue: "fake just enough of the queue.Queue API on top of redis" def __init__(self, client): self.client = client def put(self, value): print(f"pushing {value}") self.client.lpush("adaptive", json.dumps(value)) rq = RedisQueue(redis.StrictRedis(host="localhost", port=6379, db=0)) adaptive_obj = recommendations.StepRecommender(1.5) independent_keys = ["motor"] dependent_keys = ["det"] queue = rq max_count = 15 rr, _ = per_start.recommender_factory(adaptive_obj, independent_keys, dependent_keys, max_count=max_count, queue=queue) d.subscribe(rr) print("REMOTE IS READY TO START") d.start()
def test_publisher_and_remote_dispatcher(kafka_bootstrap_servers, temporary_topics, publisher_factory, hw, serializer, deserializer): """Test publishing and dispatching bluesky documents in Kafka messages. Messages will be "dispatched" by a `bluesky_kafka.RemoteDispatcher`. Parameters ---------- kafka_bootstrap_servers: str (pytest fixture) comma-delimited string of Kafka broker host:port, for example "localhost:9092" temporary_topics: context manager (pytest fixture) creates and cleans up temporary Kafka topics for testing publisher_factory: pytest fixture fixture-as-a-factory for creating Publishers hw: pytest fixture ophyd simulated hardware objects serializer: function (pytest test parameter) function used to serialize bluesky documents in Kafka messages deserializer: function (pytest test parameter) function used to deserialize bluesky documents from Kafka messages """ with temporary_topics(topics=[ f"test.publisher.and.remote.dispatcher.{serializer.__module__}" ]) as (topic, ): bluesky_publisher = publisher_factory(topic=topic, key=f"{topic}.key", flush_on_stop_doc=True, serializer=serializer) published_bluesky_documents = [] # this function will store all documents # published by the RunEngine in a list def store_published_document(name, document): published_bluesky_documents.append((name, document)) RE = RunEngine() RE.subscribe(bluesky_publisher) RE.subscribe(store_published_document) # include some metadata in the count plan # to test numpy serialization md = { "numpy_data": { "nested": np.array([1, 2, 3]) }, "numpy_scalar": np.float64(3), "numpy_array": np.ones((3, 3)), } RE(count([hw.det])) # it is known that RE(count()) will produce four # documents: start, descriptor, event, stop assert len(published_bluesky_documents) == 4 remote_dispatcher = RemoteDispatcher( topics=[topic], bootstrap_servers=kafka_bootstrap_servers, group_id=f"{topic}.consumer.group", consumer_config={ # it is important to set a short time interval # for automatic commits or the Kafka broker may # not be notified by the consumer that messages # were received before the test ends; the result # is that the Kafka broker will try to re-deliver # those messages to the next consumer that subscribes # to the same topic(s) "auto.commit.interval.ms": 100, # this consumer is intended to read messages that # have already been published, so it is necessary # to specify "earliest" here "auto.offset.reset": "earliest", }, polling_duration=1.0, deserializer=deserializer, ) dispatched_bluesky_documents = [] # this function stores all documents remote_dispatcher # gets from the Kafka broker in a list def store_dispatched_document(name, document): dispatched_bluesky_documents.append((name, document)) remote_dispatcher.subscribe(store_dispatched_document) # this function returns False to end the remote_dispatcher polling loop def until_first_stop_document(): assert len(dispatched_bluesky_documents) <= len( published_bluesky_documents) if "stop" in [name for name, _ in dispatched_bluesky_documents]: return False else: return True # start() will return when 'until_first_stop_document' returns False remote_dispatcher.start(continue_polling=until_first_stop_document, ) assert len(published_bluesky_documents) == len( dispatched_bluesky_documents) # sanitize_doc normalizes some document data, such as numpy arrays, that are # problematic for direct comparison of documents by 'assert' sanitized_published_bluesky_documents = [ sanitize_doc(doc) for doc in published_bluesky_documents ] sanitized_dispatched_bluesky_documents = [ sanitize_doc(doc) for doc in dispatched_bluesky_documents ] assert len(sanitized_dispatched_bluesky_documents) == len( sanitized_published_bluesky_documents) assert (sanitized_dispatched_bluesky_documents == sanitized_published_bluesky_documents)