def start_output_stream_and_listen(self, ctd_stream_id, data_product_stream_ids, message_count_per_stream=10): assertions = self.assertTrue exchange_name = 'workflow_test' ### ### Make a subscriber in the test to listen for transformed data ### salinity_subscription_id = self.pubsubclient.create_subscription( name='test workflow transformations', exchange_name=exchange_name, stream_ids=data_product_stream_ids) result = gevent.event.AsyncResult() results = [] message_count = len(data_product_stream_ids) * message_count_per_stream def message_received(message, stream_route, stream_id): # Heads results.append(message) if len( results ) >= message_count: #Only wait for so many messages - per stream result.set(True) subscriber = StandaloneStreamSubscriber(exchange_name='workflow_test', callback=message_received) subscriber.xn.purge() self.addCleanup(subscriber.xn.delete) subscriber.start() # after the queue has been created it is safe to activate the subscription self.pubsubclient.activate_subscription( subscription_id=salinity_subscription_id) #Start the input stream process if ctd_stream_id is not None: ctd_sim_pid = self.start_simple_input_stream_process(ctd_stream_id) # Assert that we have received data assertions(result.get(timeout=30)) # stop the flow parse the messages... if ctd_stream_id is not None: self.process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data self.pubsubclient.deactivate_subscription( subscription_id=salinity_subscription_id) subscriber.stop() return results
def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m, r, s): self.sub1_sat.set() def subscriber2(m, r, s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber("sub1", subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() sub2 = StandaloneStreamSubscriber("sub2", subscriber2) self.queue_cleanup.append(sub2.xn.queue) sub2.start() log_topic = self.pubsub_management.create_topic("instrument_logs", exchange_point="instruments") science_topic = self.pubsub_management.create_topic("science_data", exchange_point="instruments") events_topic = self.pubsub_management.create_topic("notifications", exchange_point="events") log_stream, route = self.pubsub_management.create_stream( "instrument1-logs", topic_ids=[log_topic], exchange_point="instruments" ) ctd_stream, route = self.pubsub_management.create_stream( "instrument1-ctd", topic_ids=[science_topic], exchange_point="instruments" ) event_stream, route = self.pubsub_management.create_stream( "notifications", topic_ids=[events_topic], exchange_point="events" ) raw_stream, route = self.pubsub_management.create_stream("temp", exchange_point="global.data") self.exchange_cleanup.extend(["instruments", "events", "global.data"]) subscription1 = self.pubsub_management.create_subscription( "subscription1", stream_ids=[log_stream, event_stream], exchange_name="sub1" ) subscription2 = self.pubsub_management.create_subscription( "subscription2", exchange_points=["global.data"], stream_ids=[ctd_stream], exchange_name="sub2" ) self.pubsub_management.activate_subscription(subscription1) self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) sub1.stop() sub2.stop()
def test_demux(self): self.stream0, self.route0 = self.pubsub_client.create_stream( 'stream0', exchange_point='test') self.stream1, self.route1 = self.pubsub_client.create_stream( 'stream1', exchange_point='main_data') self.stream2, self.route2 = self.pubsub_client.create_stream( 'stream2', exchange_point='alt_data') self.r_stream1 = gevent.event.Event() self.r_stream2 = gevent.event.Event() def process(msg, stream_route, stream_id): if stream_id == self.stream1: self.r_stream1.set() elif stream_id == self.stream2: self.r_stream2.set() self.container.spawn_process( 'demuxer', 'ion.processes.data.transforms.mux', 'DemuxTransform', {'process': { 'out_streams': [self.stream1, self.stream2] }}, 'demuxer_pid') self.queue_cleanup.append('demuxer_pid') sub1 = StandaloneStreamSubscriber('sub1', process) sub2 = StandaloneStreamSubscriber('sub2', process) sub1.xn.bind(self.route1.routing_key, self.container.ex_manager.create_xp('main_data')) sub2.xn.bind(self.route2.routing_key, self.container.ex_manager.create_xp('alt_data')) sub1.start() sub2.start() self.queue_cleanup.append(sub1.xn) self.queue_cleanup.append(sub2.xn) xn = self.container.ex_manager.create_xn_queue('demuxer_pid') xn.bind( self.route0.routing_key, self.container.ex_manager.create_xp(self.route0.exchange_point)) domino = StandaloneStreamPublisher(self.stream0, self.route0) domino.publish('test') self.assertTrue(self.r_stream1.wait(2)) self.assertTrue(self.r_stream2.wait(2)) self.container.proc_manager.terminate_process('demuxer_pid') sub1.stop() sub2.stop()
def start_output_stream_and_listen(self, ctd_stream_id, data_product_stream_ids, message_count_per_stream=10): assertions = self.assertTrue exchange_name = 'workflow_test' ### ### Make a subscriber in the test to listen for transformed data ### salinity_subscription_id = self.pubsubclient.create_subscription( name = 'test workflow transformations', exchange_name = exchange_name, stream_ids = data_product_stream_ids ) result = gevent.event.AsyncResult() results = [] message_count = len(data_product_stream_ids) * message_count_per_stream def message_received(message, stream_route, stream_id): # Heads results.append(message) if len(results) >= message_count: #Only wait for so many messages - per stream result.set(True) subscriber = StandaloneStreamSubscriber(exchange_name='workflow_test', callback=message_received) subscriber.xn.purge() self.addCleanup(subscriber.xn.delete) subscriber.start() # after the queue has been created it is safe to activate the subscription self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id) #Start the input stream process if ctd_stream_id is not None: ctd_sim_pid = self.start_simple_input_stream_process(ctd_stream_id) # Assert that we have received data assertions(result.get(timeout=30)) # stop the flow parse the messages... if ctd_stream_id is not None: self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data self.pubsubclient.deactivate_subscription(subscription_id=salinity_subscription_id) subscriber.stop() return results
def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m,r,s): self.sub1_sat.set() def subscriber2(m,r,s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber('sub1', subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() sub2 = StandaloneStreamSubscriber('sub2', subscriber2) self.queue_cleanup.append(sub2.xn.queue) sub2.start() log_topic = self.pubsub_management.create_topic('instrument_logs', exchange_point='instruments') science_topic = self.pubsub_management.create_topic('science_data', exchange_point='instruments') events_topic = self.pubsub_management.create_topic('notifications', exchange_point='events') log_stream, route = self.pubsub_management.create_stream('instrument1-logs', topic_ids=[log_topic], exchange_point='instruments') ctd_stream, route = self.pubsub_management.create_stream('instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments') event_stream, route = self.pubsub_management.create_stream('notifications', topic_ids=[events_topic], exchange_point='events') raw_stream, route = self.pubsub_management.create_stream('temp', exchange_point='global.data') self.exchange_cleanup.extend(['instruments','events','global.data']) subscription1 = self.pubsub_management.create_subscription('subscription1', stream_ids=[log_stream,event_stream], exchange_name='sub1') subscription2 = self.pubsub_management.create_subscription('subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2') self.pubsub_management.activate_subscription(subscription1) self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream,1) self.assertTrue(self.sub1_sat.wait(4)) sub1.stop() sub2.stop()
def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream( 'stream1', 'xp1') subscription_id = self.pubsub_management.create_subscription( 'sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m, r, s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1', verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id)
def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream('stream1','xp1') subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m,r,s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1',verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id)
def test_demux(self): self.stream0, self.route0 = self.pubsub_client.create_stream('stream0', exchange_point='test') self.stream1, self.route1 = self.pubsub_client.create_stream('stream1', exchange_point='main_data') self.stream2, self.route2 = self.pubsub_client.create_stream('stream2', exchange_point='alt_data') self.r_stream1 = gevent.event.Event() self.r_stream2 = gevent.event.Event() def process(msg, stream_route, stream_id): if stream_id == self.stream1: self.r_stream1.set() elif stream_id == self.stream2: self.r_stream2.set() self.container.spawn_process('demuxer', 'ion.processes.data.transforms.mux', 'DemuxTransform', {'process':{'out_streams':[self.stream1, self.stream2]}}, 'demuxer_pid') self.queue_cleanup.append('demuxer_pid') sub1 = StandaloneStreamSubscriber('sub1', process) sub2 = StandaloneStreamSubscriber('sub2', process) sub1.xn.bind(self.route1.routing_key, self.container.ex_manager.create_xp('main_data')) sub2.xn.bind(self.route2.routing_key, self.container.ex_manager.create_xp('alt_data')) sub1.start() sub2.start() self.queue_cleanup.append(sub1.xn) self.queue_cleanup.append(sub2.xn) xn = self.container.ex_manager.create_xn_queue('demuxer_pid') xn.bind(self.route0.routing_key, self.container.ex_manager.create_xp(self.route0.exchange_point)) domino = StandaloneStreamPublisher(self.stream0, self.route0) domino.publish('test') self.assertTrue(self.r_stream1.wait(2)) self.assertTrue(self.r_stream2.wait(2)) self.container.proc_manager.terminate_process('demuxer_pid') sub1.stop() sub2.stop()
def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_simplex_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop()
def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- xp = self.container.ex_manager.create_xp(self.exchange_point_name) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) subscriber.stop() self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b,bool) else b) self.streams.append(stream_id) self.stop_ingestion(stream_id)
def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m, r, s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber('sub1', subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1') topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1) topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1) topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2) topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2) topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3) topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3) # Tree 2 topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2') topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8) topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9) topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9) topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11) topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11) self.exchange_cleanup.extend(['xp1', 'xp2']) stream1_id, route = self.pubsub_management.create_stream( 'stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1') stream2_id, route = self.pubsub_management.create_stream( 'stream2', topic_ids=[topic8], exchange_point='xp2') stream3_id, route = self.pubsub_management.create_stream( 'stream3', topic_ids=[topic10, topic13], exchange_point='xp2') stream4_id, route = self.pubsub_management.create_stream( 'stream4', topic_ids=[topic9], exchange_point='xp2') stream5_id, route = self.pubsub_management.create_stream( 'stream5', topic_ids=[topic11], exchange_point='xp2') subscription1 = self.pubsub_management.create_subscription( 'sub1', topic_ids=[topic1]) subscription2 = self.pubsub_management.create_subscription( 'sub2', topic_ids=[topic8], exchange_name='sub1') subscription3 = self.pubsub_management.create_subscription( 'sub3', topic_ids=[topic9], exchange_name='sub1') subscription4 = self.pubsub_management.create_subscription( 'sub4', topic_ids=[topic10, topic13, topic11], exchange_name='sub1') #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id, 1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) self.pubsub_management.delete_subscription(subscription1) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id, 2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) self.pubsub_management.delete_subscription(subscription2) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10), 4) self.pubsub_management.deactivate_subscription(subscription3) self.pubsub_management.delete_subscription(subscription3) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10), 6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) self.pubsub_management.delete_subscription(subscription4) #-------------------------------------------------------------------------------- sub1.stop() self.pubsub_management.delete_topic(topic13) self.pubsub_management.delete_topic(topic12) self.pubsub_management.delete_topic(topic11) self.pubsub_management.delete_topic(topic10) self.pubsub_management.delete_topic(topic9) self.pubsub_management.delete_topic(topic8) self.pubsub_management.delete_topic(topic7) self.pubsub_management.delete_topic(topic6) self.pubsub_management.delete_topic(topic5) self.pubsub_management.delete_topic(topic4) self.pubsub_management.delete_topic(topic3) self.pubsub_management.delete_topic(topic2) self.pubsub_management.delete_topic(topic1) self.pubsub_management.delete_stream(stream1_id) self.pubsub_management.delete_stream(stream2_id) self.pubsub_management.delete_stream(stream3_id) self.pubsub_management.delete_stream(stream4_id) self.pubsub_management.delete_stream(stream5_id)
def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m, r, s): self.sub1_sat.set() def subscriber2(m, r, s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber('sub1', subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() sub2 = StandaloneStreamSubscriber('sub2', subscriber2) self.queue_cleanup.append(sub2.xn.queue) sub2.start() log_topic = self.pubsub_management.create_topic( 'instrument_logs', exchange_point='instruments') science_topic = self.pubsub_management.create_topic( 'science_data', exchange_point='instruments') events_topic = self.pubsub_management.create_topic( 'notifications', exchange_point='events') log_stream, route = self.pubsub_management.create_stream( 'instrument1-logs', topic_ids=[log_topic], exchange_point='instruments') ctd_stream, route = self.pubsub_management.create_stream( 'instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments') event_stream, route = self.pubsub_management.create_stream( 'notifications', topic_ids=[events_topic], exchange_point='events') raw_stream, route = self.pubsub_management.create_stream( 'temp', exchange_point='global.data') self.exchange_cleanup.extend(['instruments', 'events', 'global.data']) subscription1 = self.pubsub_management.create_subscription( 'subscription1', stream_ids=[log_stream, event_stream], exchange_name='sub1') subscription2 = self.pubsub_management.create_subscription( 'subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2') self.pubsub_management.activate_subscription(subscription1) self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) sub1.stop() sub2.stop()
def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m, r, s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber("sub1", subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() topic1 = self.pubsub_management.create_topic("topic1", exchange_point="xp1") topic2 = self.pubsub_management.create_topic("topic2", exchange_point="xp1", parent_topic_id=topic1) topic3 = self.pubsub_management.create_topic("topic3", exchange_point="xp1", parent_topic_id=topic1) topic4 = self.pubsub_management.create_topic("topic4", exchange_point="xp1", parent_topic_id=topic2) topic5 = self.pubsub_management.create_topic("topic5", exchange_point="xp1", parent_topic_id=topic2) topic6 = self.pubsub_management.create_topic("topic6", exchange_point="xp1", parent_topic_id=topic3) topic7 = self.pubsub_management.create_topic("topic7", exchange_point="xp1", parent_topic_id=topic3) # Tree 2 topic8 = self.pubsub_management.create_topic("topic8", exchange_point="xp2") topic9 = self.pubsub_management.create_topic("topic9", exchange_point="xp2", parent_topic_id=topic8) topic10 = self.pubsub_management.create_topic("topic10", exchange_point="xp2", parent_topic_id=topic9) topic11 = self.pubsub_management.create_topic("topic11", exchange_point="xp2", parent_topic_id=topic9) topic12 = self.pubsub_management.create_topic("topic12", exchange_point="xp2", parent_topic_id=topic11) topic13 = self.pubsub_management.create_topic("topic13", exchange_point="xp2", parent_topic_id=topic11) self.exchange_cleanup.extend(["xp1", "xp2"]) stream1_id, route = self.pubsub_management.create_stream( "stream1", topic_ids=[topic7, topic4, topic5], exchange_point="xp1" ) stream2_id, route = self.pubsub_management.create_stream("stream2", topic_ids=[topic8], exchange_point="xp2") stream3_id, route = self.pubsub_management.create_stream( "stream3", topic_ids=[topic10, topic13], exchange_point="xp2" ) stream4_id, route = self.pubsub_management.create_stream("stream4", topic_ids=[topic9], exchange_point="xp2") stream5_id, route = self.pubsub_management.create_stream("stream5", topic_ids=[topic11], exchange_point="xp2") subscription1 = self.pubsub_management.create_subscription("sub1", topic_ids=[topic1]) subscription2 = self.pubsub_management.create_subscription("sub2", topic_ids=[topic8], exchange_name="sub1") subscription3 = self.pubsub_management.create_subscription("sub3", topic_ids=[topic9], exchange_name="sub1") subscription4 = self.pubsub_management.create_subscription( "sub4", topic_ids=[topic10, topic13, topic11], exchange_name="sub1" ) # -------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id, 1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) self.pubsub_management.delete_subscription(subscription1) # -------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id, 2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) self.pubsub_management.delete_subscription(subscription2) # -------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10), 4) self.pubsub_management.deactivate_subscription(subscription3) self.pubsub_management.delete_subscription(subscription3) # -------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10), 6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) self.pubsub_management.delete_subscription(subscription4) # -------------------------------------------------------------------------------- sub1.stop() self.pubsub_management.delete_topic(topic13) self.pubsub_management.delete_topic(topic12) self.pubsub_management.delete_topic(topic11) self.pubsub_management.delete_topic(topic10) self.pubsub_management.delete_topic(topic9) self.pubsub_management.delete_topic(topic8) self.pubsub_management.delete_topic(topic7) self.pubsub_management.delete_topic(topic6) self.pubsub_management.delete_topic(topic5) self.pubsub_management.delete_topic(topic4) self.pubsub_management.delete_topic(topic3) self.pubsub_management.delete_topic(topic2) self.pubsub_management.delete_topic(topic1) self.pubsub_management.delete_stream(stream1_id) self.pubsub_management.delete_stream(stream2_id) self.pubsub_management.delete_stream(stream3_id) self.pubsub_management.delete_stream(stream4_id) self.pubsub_management.delete_stream(stream5_id)
def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) context_ids = self.dataset_management.read_parameter_contexts( pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append( self.dataset_management.create_parameter_context( 'binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append( self.dataset_management.create_parameter_context( 'records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( 'replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition( 'replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream( 'replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage( dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber( self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay( dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop()
def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) context_ids = self.dataset_management.read_parameter_contexts( pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append( self.dataset_management.create_parameter_context( 'binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append( self.dataset_management.create_parameter_context( 'records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( 'replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition( 'ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream( 'producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id, 40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(), '%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi'] * 10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream( 'replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay( dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- xp = self.container.ex_manager.create_xp(self.exchange_point_name) subscriber = StandaloneStreamSubscriber( self.exchange_space_name, self.validate_granule_subscription) self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) subscriber.stop() self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa': slice(0, 5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b, bool) else b) self.streams.append(stream_id) self.stop_ingestion(stream_id)
def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m,r,s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber('sub1', subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1') topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1) topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1) topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2) topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2) topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3) topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3) # Tree 2 topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2') topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8) topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9) topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9) topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11) topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11) self.exchange_cleanup.extend(['xp1','xp2']) stream1_id, route = self.pubsub_management.create_stream('stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1') stream2_id, route = self.pubsub_management.create_stream('stream2', topic_ids=[topic8], exchange_point='xp2') stream3_id, route = self.pubsub_management.create_stream('stream3', topic_ids=[topic10,topic13], exchange_point='xp2') stream4_id, route = self.pubsub_management.create_stream('stream4', topic_ids=[topic9], exchange_point='xp2') stream5_id, route = self.pubsub_management.create_stream('stream5', topic_ids=[topic11], exchange_point='xp2') subscription1 = self.pubsub_management.create_subscription('sub1', topic_ids=[topic1]) subscription2 = self.pubsub_management.create_subscription('sub2', topic_ids=[topic8], exchange_name='sub1') subscription3 = self.pubsub_management.create_subscription('sub3', topic_ids=[topic9], exchange_name='sub1') subscription4 = self.pubsub_management.create_subscription('sub4', topic_ids=[topic10,topic13, topic11], exchange_name='sub1') #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id,1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) self.pubsub_management.delete_subscription(subscription1) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id,2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) self.pubsub_management.delete_subscription(subscription2) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10),4) self.pubsub_management.deactivate_subscription(subscription3) self.pubsub_management.delete_subscription(subscription3) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10),6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) self.pubsub_management.delete_subscription(subscription4) #-------------------------------------------------------------------------------- sub1.stop() self.pubsub_management.delete_topic(topic13) self.pubsub_management.delete_topic(topic12) self.pubsub_management.delete_topic(topic11) self.pubsub_management.delete_topic(topic10) self.pubsub_management.delete_topic(topic9) self.pubsub_management.delete_topic(topic8) self.pubsub_management.delete_topic(topic7) self.pubsub_management.delete_topic(topic6) self.pubsub_management.delete_topic(topic5) self.pubsub_management.delete_topic(topic4) self.pubsub_management.delete_topic(topic3) self.pubsub_management.delete_topic(topic2) self.pubsub_management.delete_topic(topic1) self.pubsub_management.delete_stream(stream1_id) self.pubsub_management.delete_stream(stream2_id) self.pubsub_management.delete_stream(stream3_id) self.pubsub_management.delete_stream(stream4_id) self.pubsub_management.delete_stream(stream5_id)