def _dispatcher_factory( topics, group_id="pytest", bootstrap_servers=None, consumer_config=None, **kwargs, ): """ Construct and return a bluesky_kafka.RemoteDispatcher with the specified arguments. Parameters ---------- topics: list of str, required the underlying Kafka consumer will subscribe to the specified topics group_id: str, optional the underlying Kafka consumer will have the specified group_id, "pytest" by default bootstrap_servers : str, optional comma-delimited str of Kafka bootstrap server host:port specified on the pytest command line; default is the value of the pytest command line parameter --kafka-bootstrap-servers consumer_config: dict, optional the underlying Kafka consumer will be created with the specified configuration parameters; it is recommended that consumer configuration include "auto.commit.interval.ms": 100 kwargs kwargs will be passed to bluesky_kafka.RemoteDispatcher() Returns ------- remote_dispatcher : bluesky_kafka.RemoteDispatcher instance of bluesky_kafka.RemoteDispatcher constructed with the specified arguments """ if bootstrap_servers is None: bootstrap_servers = kafka_bootstrap_servers if consumer_config is None: consumer_config = { # it is important to set a short time interval # for automatic commits or the Kafka broker may # not be notified by the consumer that messages # were received before the test ends; the result # is that the Kafka broker will try to re-deliver # those messages to the next consumer that subscribes # to the same topic(s) "auto.commit.interval.ms": 100, } remote_dispatcher = RemoteDispatcher( topics=topics, bootstrap_servers=bootstrap_servers, group_id=group_id, consumer_config=consumer_config, **kwargs, ) return remote_dispatcher
def make_and_start_dispatcher(document_queue): def put_in_queue(name, doc): document_queue.put((name, doc)) kafka_dispatcher = RemoteDispatcher( topics=[kafka_topic], bootstrap_servers=bootstrap_servers, group_id="test_kafka_publisher", consumer_config={ "auto.offset.reset": "latest", "auto.commit.interval.ms": 100, }, polling_duration=1.0, deserializer=partial(msgpack.loads, object_hook=mpn.decode), ) kafka_dispatcher.subscribe(put_in_queue) kafka_dispatcher.start()
parser = argparse.ArgumentParser(description="monogo consumer process", ) parser.add_argument("--kafka_server", type=str, help="bootstrap server to connect to.", default="127.0.0.1:9092") args = parser.parse_args() bootstrap_servers = args.kafka_server kafka_dispatcher = RemoteDispatcher( topics=["mad.bluesky.documents"], bootstrap_servers=bootstrap_servers, group_id="kafka-unit-test-group-id", # "latest" should always work but # has been failing on Linux, passing on OSX consumer_config={"auto.offset.reset": "latest"}, polling_duration=1.0, deserializer=partial(msgpack.loads, object_hook=mpn.decode), ) def echo(name, doc): ts = doc.get("time", 0) print(f"{datetime.datetime.now().isoformat()}: " f"({datetime.datetime.fromtimestamp(ts)})" f" {name} ") kafka_dispatcher.subscribe(echo) kafka_dispatcher.start()
import redis from bluesky_adaptive import recommendations from bluesky_adaptive import per_start import msgpack import msgpack_numpy as mpn from functools import partial from bluesky_kafka import RemoteDispatcher d = RemoteDispatcher( topics=["adaptive"], bootstrap_servers="127.0.0.1:9092", group_id="kafka-unit-test-group-id", # "latest" should always work but # has been failing on Linux, passing on OSX consumer_config={"auto.offset.reset": "latest"}, polling_duration=1.0, deserializer=partial(msgpack.loads, object_hook=mpn.decode), ) class RedisQueue: "fake just enough of the queue.Queue API on top of redis" def __init__(self, client): self.client = client def put(self, value): print(f"pushing {value}") self.client.lpush("adaptive", json.dumps(value))
def export(*args): filenames = view.export_all(directory) print("\n".join(f'"{filename}"' for filename in filenames)) view.close() if run_is_live_and_not_completed(run): run.events.new_data.connect(export) else: export() if __name__ == "__main__": bootstrap_servers = "127.0.0.1:9092" kafka_deserializer = partial(msgpack.loads, object_hook=mpn.decode) topics = ["widgets_test.bluesky.documents"] consumer_config = { "auto.commit.interval.ms": 100, "auto.offset.reset": "latest" } dispatcher = RemoteDispatcher( topics=topics, bootstrap_servers=bootstrap_servers, group_id="widgets_test", consumer_config=consumer_config, ) dispatcher.subscribe( stream_documents_into_runs(export_thumbnails_when_complete)) dispatcher.start()
ret.append(self.get_data(fn)) elif pilatus_trigger_mode == triggerMode.external_trigger: fn = self._template % (self._path, self._filename, start, point_number) ret.append(self.get_data(fn)) return np.array(ret).squeeze() db = Broker.named("lix") db.reg.register_handler("AD_CBF", PilatusCBFHandler, overwrite=True) d = RemoteDispatcher( topics=["lix.bluesky.documents"], bootstrap_servers="10.0.137.8:9092", group_id="lix.export.worker", consumer_config={"auto.offset.reset": "latest"}, polling_duration=1.0, deserializer=partial(msgpack.unpackb, object_hook=mpn.decode), ) def multi_file_packer_factory(name, doc): packer = MultiFilePacker( directory="/tmp/export_worker/multi_file/", max_frames_per_file=2, handler_class=PilatusCBFHandler, ) print("created a MultiFilePacker") return [packer], []
def test_publisher_and_remote_dispatcher(kafka_bootstrap_servers, temporary_topics, publisher_factory, hw, serializer, deserializer): """Test publishing and dispatching bluesky documents in Kafka messages. Messages will be "dispatched" by a `bluesky_kafka.RemoteDispatcher`. Parameters ---------- kafka_bootstrap_servers: str (pytest fixture) comma-delimited string of Kafka broker host:port, for example "localhost:9092" temporary_topics: context manager (pytest fixture) creates and cleans up temporary Kafka topics for testing publisher_factory: pytest fixture fixture-as-a-factory for creating Publishers hw: pytest fixture ophyd simulated hardware objects serializer: function (pytest test parameter) function used to serialize bluesky documents in Kafka messages deserializer: function (pytest test parameter) function used to deserialize bluesky documents from Kafka messages """ with temporary_topics(topics=[ f"test.publisher.and.remote.dispatcher.{serializer.__module__}" ]) as (topic, ): bluesky_publisher = publisher_factory(topic=topic, key=f"{topic}.key", flush_on_stop_doc=True, serializer=serializer) published_bluesky_documents = [] # this function will store all documents # published by the RunEngine in a list def store_published_document(name, document): published_bluesky_documents.append((name, document)) RE = RunEngine() RE.subscribe(bluesky_publisher) RE.subscribe(store_published_document) # include some metadata in the count plan # to test numpy serialization md = { "numpy_data": { "nested": np.array([1, 2, 3]) }, "numpy_scalar": np.float64(3), "numpy_array": np.ones((3, 3)), } RE(count([hw.det])) # it is known that RE(count()) will produce four # documents: start, descriptor, event, stop assert len(published_bluesky_documents) == 4 remote_dispatcher = RemoteDispatcher( topics=[topic], bootstrap_servers=kafka_bootstrap_servers, group_id=f"{topic}.consumer.group", consumer_config={ # it is important to set a short time interval # for automatic commits or the Kafka broker may # not be notified by the consumer that messages # were received before the test ends; the result # is that the Kafka broker will try to re-deliver # those messages to the next consumer that subscribes # to the same topic(s) "auto.commit.interval.ms": 100, # this consumer is intended to read messages that # have already been published, so it is necessary # to specify "earliest" here "auto.offset.reset": "earliest", }, polling_duration=1.0, deserializer=deserializer, ) dispatched_bluesky_documents = [] # this function stores all documents remote_dispatcher # gets from the Kafka broker in a list def store_dispatched_document(name, document): dispatched_bluesky_documents.append((name, document)) remote_dispatcher.subscribe(store_dispatched_document) # this function returns False to end the remote_dispatcher polling loop def until_first_stop_document(): assert len(dispatched_bluesky_documents) <= len( published_bluesky_documents) if "stop" in [name for name, _ in dispatched_bluesky_documents]: return False else: return True # start() will return when 'until_first_stop_document' returns False remote_dispatcher.start(continue_polling=until_first_stop_document, ) assert len(published_bluesky_documents) == len( dispatched_bluesky_documents) # sanitize_doc normalizes some document data, such as numpy arrays, that are # problematic for direct comparison of documents by 'assert' sanitized_published_bluesky_documents = [ sanitize_doc(doc) for doc in published_bluesky_documents ] sanitized_dispatched_bluesky_documents = [ sanitize_doc(doc) for doc in dispatched_bluesky_documents ] assert len(sanitized_dispatched_bluesky_documents) == len( sanitized_published_bluesky_documents) assert (sanitized_dispatched_bluesky_documents == sanitized_published_bluesky_documents)