def test_read_and_update_subscription(self): # Mocks subscription_obj = Subscription() subscription_obj.query = StreamQuery(['789']) subscription_obj.is_active=False subscription_obj.subscription_type = SubscriptionTypeEnum.STREAM_QUERY self.mock_read.return_value = subscription_obj self.mock_find_objects.return_value = (['789'],['This here is an association']) self.mock_update.return_value = ('not important','even less so') # Execution query = StreamQuery(['123']) retval = self.pubsub_service.update_subscription('subscription_id', query) # Assertions self.mock_read.assert_called_once_with('subscription_id','') self.mock_find_objects.assert_called_once_with('subscription_id',PRED.hasStream,'',True) self.mock_delete_association.assert_called_once_with('This here is an association') self.mock_create_association.assert_called_once_with('subscription_id',PRED.hasStream,'123',None) self.assertTrue(self.mock_update.call_count == 1, 'update was not called')
def _start_output_stream_listener(self, data_product_stream_ids, message_count_per_stream=10): cc = self.container assertions = self.assertTrue ### ### Make a subscriber in the test to listen for transformed data ### salinity_subscription_id = self.pubsubclient.create_subscription( query=StreamQuery(data_product_stream_ids), exchange_name='workflow_test', name="test workflow transformations", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn(' data received!') results.append(message) if len(results) >= len( data_product_stream_ids ) * message_count_per_stream: #Only wait for so many messages - per stream result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='workflow_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription self.pubsubclient.activate_subscription( subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=30)) self.pubsubclient.deactivate_subscription( subscription_id=salinity_subscription_id) subscriber.stop() return results
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2dm.yml') self.pubsub_cli = PubsubManagementServiceClient(node=self.container.node) self.ctd_stream1_id = self.pubsub_cli.create_stream(name="SampleStream1", description="Sample Stream 1 Description") self.ctd_stream2_id = self.pubsub_cli.create_stream(name="SampleStream2", description="Sample Stream 2 Description") # Make a subscription to two input streams exchange_name = "a_queue" query = StreamQuery([self.ctd_stream1_id, self.ctd_stream2_id]) self.ctd_subscription_id = self.pubsub_cli.create_subscription(query, exchange_name, "SampleSubscription", "Sample Subscription Description") # Make a subscription to all streams on an exchange point exchange_name = "another_queue" query = ExchangeQuery() self.exchange_subscription_id = self.pubsub_cli.create_subscription(query, exchange_name, "SampleExchangeSubscription", "Sample Exchange Subscription Description") pid = self.container.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = self.container.proc_manager.procs[pid] # Normally the user does not see or create the publisher, this is part of the containers business. # For the test we need to set it up explicitly publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=self.container.node) self.ctd_stream1_publisher = publisher_registrar.create_publisher(stream_id=self.ctd_stream1_id) self.ctd_stream2_publisher = publisher_registrar.create_publisher(stream_id=self.ctd_stream2_id) # Cheat and use the cc as the process - I don't think it is used for anything... self.stream_subscriber = StreamSubscriberRegistrar(process=dummy_process, node=self.container.node)
def setUp(self): # set up the container self._start_container() self.container.start_rel_from_url('res/deploy/r2dm.yml') self.pubsub_cli = PubsubManagementServiceClient(node=self.container.node) self.tms_cli = TransformManagementServiceClient(node=self.container.node) self.rr_cli = ResourceRegistryServiceClient(node=self.container.node) self.procd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.input_stream_id = self.pubsub_cli.create_stream(name='input_stream',original=True) self.input_subscription_id = self.pubsub_cli.create_subscription(query=StreamQuery(stream_ids=[self.input_stream_id]),exchange_name='transform_input',name='input_subscription') self.output_stream_id = self.pubsub_cli.create_stream(name='output_stream',original=True) self.process_definition = ProcessDefinition(name='basic_transform_definition') self.process_definition.executable = {'module': 'ion.processes.data.transforms.transform_example', 'class':'TransformExample'} self.process_definition_id = self.procd_cli.create_process_definition(process_definition=self.process_definition)
def _start_data_subscribers(self): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # A callback for processing subscribed-to data. def consume_data(message, headers): log.info('Subscriber received data message: %s.', str(message)) self._samples_received.append(message) if self._no_samples and self._no_samples == len(self._samples_received): self._async_data_result.set() # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar(process=self.container, node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._stream_config = {} self._data_subscribers = [] for (stream_name, val) in PACKET_CONFIG.iteritems(): stream_def = ctd_stream_definition(stream_id=None) stream_def_id = pubsub_client.create_stream_definition( container=stream_def) stream_id = pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding='ION R2') self._stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume_data) self._listen(sub) self._data_subscribers.append(sub) query = StreamQuery(stream_ids=[stream_id]) sub_id = pubsub_client.create_subscription(\ query=query, exchange_name=exchange_name) pubsub_client.activate_subscription(sub_id)
def on_start(self): pubsub_cli = PubsubManagementServiceProcessClient( process=self, node=self.container.node) # Get the stream(s) stream_id = self.CFG.get_safe('process.stream_id', '') query = StreamQuery(stream_ids=[ stream_id, ]) exchange_name = 'dispatcher_%s' % self.id subscription_id = pubsub_cli.create_subscription( query=query, exchange_name=exchange_name, name="SampleSubscription", description="Sample Subscription Description") stream_subscriber = StreamSubscriberRegistrar(process=self, node=self.container.node) def message_received(granule, h): rdt = RecordDictionaryTool.load_from_granule(granule) log.warn( 'Logging Record Dictionary received in logger subscription \n%s', rdt.pretty_print()) subscriber = stream_subscriber.create_subscriber( exchange_name=exchange_name, callback=message_received) subscriber.start() pubsub_cli.activate_subscription(subscription_id)
def on_start(self): rr_cli = ResourceRegistryServiceProcessClient(process=self, node=self.container.node) pubsub_cli = PubsubManagementServiceProcessClient( process=self, node=self.container.node) # Get the stream(s) data_product_id = self.CFG.get_safe('dispatcher.data_product_id', '') stream_ids, _ = rr_cli.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) log.info('Got Stream Ids: "%s"', stream_ids) assert stream_ids, 'No streams found for this data product!' query = StreamQuery(stream_ids=stream_ids) exchange_name = 'dispatcher_%s' % str(os.getpid()) subscription_id = pubsub_cli.create_subscription( query=query, exchange_name=exchange_name, name="SampleSubscription", description="Sample Subscription Description") stream_subscriber = StreamSubscriberRegistrar(process=self, node=self.container.node) stream_defs = {} def message_received(granule, h): stream_id = granule.stream_resource_id data_stream_id = granule.data_stream_id data_stream = granule.identifiables[data_stream_id] tstamp = get_datetime(data_stream.timestamp.value) records = granule.identifiables['record_count'].value log.info( 'Received a message from stream %s with time stamp %s and %d records' % (stream_id, tstamp, records)) if stream_id not in stream_defs: stream_defs[stream_id] = pubsub_cli.find_stream_definition( stream_id, id_only=False).container stream_def = stream_defs.get(stream_id) sp = PointSupplementStreamParser(stream_definition=stream_def, stream_granule=granule) last_data = {} for field in sp.list_field_names(): last_data[field] = sp.get_values(field)[-1] log.info('Last values in the message: %s' % str(last_data)) subscriber = stream_subscriber.create_subscriber( exchange_name=exchange_name, callback=message_received) subscriber.start() pubsub_cli.activate_subscription(subscription_id)
def on_start(self): # The data dictionary object holds a copy of all the viz products created by the service. The viz # products are indexed by the viz_product_type and data_product_id (which could be google_datatables or # mpl_graphs self.viz_data_dictionary = {} self.viz_data_dictionary['google_dt'] = {} self.viz_data_dictionary['google_realtime_dt'] = {} self.viz_data_dictionary['matplotlib_graphs'] = {} # Kind of redundant but we will maintain a separate list of data product_ids registered with the viz_service self.data_products = [] # Create clients to interface with PubSub, Transform Management Service and Resource Registry self.pubsub_cli = self.clients.pubsub_management self.tms_cli = self.clients.transform_management self.rr_cli = self.clients.resource_registry self.dr_cli = self.clients.data_retriever self.dsm_cli = self.clients.dataset_management """ # Create process definitions which will used to spawn off the transform processes self.matplotlib_proc_def = IonObject(RT.ProcessDefinition, name='viz_transform_process'+'.'+self.random_id_generator()) self.matplotlib_proc_def.executable = { 'module': 'ion.services.ans.visualization_service', 'class':'VizTransformProcForMatplotlibGraphs' } self.matplotlib_proc_def_id, _ = self.rr_cli.create(self.matplotlib_proc_def) self.google_dt_proc_def = IonObject(RT.ProcessDefinition, name='viz_transform_process'+'.'+self.random_id_generator()) self.google_dt_proc_def.executable = { 'module': 'ion.services.ans.visualization_service', 'class':'VizTransformProcForGoogleDT' } self.google_dt_proc_def_id, _ = self.rr_cli.create(self.google_dt_proc_def) """ # Query resource registry to get process definitions and streams ids made by the bootstrap proc_def_ids, _ = self.rr_cli.find_resources( restype=RT.ProcessDefinition, lcstate=None, name="viz_matplotlib_transform_process", id_only=True) self.matplotlib_proc_def_id = proc_def_ids[0] proc_def_ids, _ = self.rr_cli.find_resources( restype=RT.ProcessDefinition, lcstate=None, name="viz_google_dt_transform_process", id_only=True) self.google_dt_proc_def_id = proc_def_ids[0] # Create a stream that all the transform processes will use to submit data back to the viz service self.viz_service_submit_stream_id = self.pubsub_cli.create_stream( name="visualization_service_submit_stream." + self.random_id_generator()) # subscribe to this stream since all the results from transforms will be submitted here query = StreamQuery(stream_ids=[ self.viz_service_submit_stream_id, ]) self.viz_service_submit_stream_sub_id = self.pubsub_cli.create_subscription( query=query, exchange_name="visualization_service_submit_queue") submit_stream_subscriber_registrar = StreamSubscriberRegistrar( process=self.container, node=self.container.node) submit_stream_subscriber = submit_stream_subscriber_registrar.create_subscriber( exchange_name='visualization_service_submit_queue', callback=self.process_submission) submit_stream_subscriber.start() self.pubsub_cli.activate_subscription( self.viz_service_submit_stream_sub_id) # Discover the existing data_product_ids active in the system sys_prod_ids, _ = self.rr_cli.find_resources(RT.DataProduct, None, None, True) # Register all the streams in the system, which will in turn start transform processes for dp_id in sys_prod_ids: self.register_new_data_product(dp_id) # listen for events when new data_products show up self.event_subscriber = EventSubscriber( event_type="ResourceModifiedEvent", origin_type="DataProduct", sub_type="UPDATE", callback=self.receive_new_dataproduct_event) self.event_subscriber.activate() return
def register_new_data_product(self, data_product_id=''): """Apprise the Visualization service of a new data product in the system. This function inits transform processes for generating the matplotlib graphs of the new data product. It also creates transform processes which generate Google data-tables for the real-time streams (sliding window) coming in from the instruments. @param data_product_id str @throws BadRequest check data_product_id for duplicates """ # Check to see if the DP has already been registered. If yes, do nothing if (data_product_id in self.viz_data_dictionary['matplotlib_graphs'] ) or (data_product_id in self.viz_data_dictionary['google_realtime_dt']): log.warn( "Data Product has already been registered with Visualization service. Ignoring." ) return # extract the stream_id associated with the data_product_id viz_stream_id, _ = self.rr_cli.find_objects(data_product_id, PRED.hasStream, None, True) if viz_stream_id == []: log.warn("Visualization_service: viz_stream_id is empty") return viz_stream_def_id = self.pubsub_cli.find_stream_definition( stream_id=viz_stream_id[0], id_only=True) # Go ahead only if the data product is unique if data_product_id in self.data_products: raise BadRequest self.data_products[len(self.data_products):] = data_product_id # init the space needed to store matplotlib_graphs and realtime Google data tables # For the matplotlib graphs, the list_of_images stores the names of the image files. The actual binary data for the # images is also stored in the same dictionary as {img_name1: binary_data1, img_name2: binary_data2 .. etc} self.viz_data_dictionary['matplotlib_graphs'][data_product_id] = { 'transform_proc': "", 'list_of_images': [] } # The 'data_table' key points to a JSON string self.viz_data_dictionary['google_realtime_dt'][data_product_id] = { 'transform_proc': "", 'data_table': [] } ############################################################################### # Create transform process for the matplotlib graphs. ############################################################################### # Create the subscription to the stream. This will be passed as parameter to the transform worker #query1 = StreamQuery(stream_ids=[viz_stream_id,]) query1 = StreamQuery(viz_stream_id) viz_subscription_id1 = self.pubsub_cli.create_subscription( query=query1, exchange_name='viz_data_exchange.' + self.random_id_generator()) # maybe this is a good place to pass the couch DB table to use and other parameters configuration1 = { "stream_def_id": viz_stream_def_id, "data_product_id": data_product_id } # Launch the viz transform process viz_transform_id1 = self.tms_cli.create_transform( name='viz_transform_matplotlib_' + self.random_id_generator() + '.' + data_product_id, in_subscription_id=viz_subscription_id1, out_streams={ "visualization_service_submit_stream_id": self.viz_service_submit_stream_id }, process_definition_id=self.matplotlib_proc_def_id, configuration=configuration1) self.tms_cli.activate_transform(viz_transform_id1) # keep a record of the the viz_transform_id self.viz_data_dictionary['matplotlib_graphs'][data_product_id][ 'transform_proc'] = viz_transform_id1 ############################################################################### # Create transform process for the Google realtime datatables ############################################################################### # Create the subscription to the stream. This will be passed as parameter to the transform worker #query2 = StreamQuery(stream_ids=[viz_stream_id,]) query2 = StreamQuery(viz_stream_id) viz_subscription_id2 = self.pubsub_cli.create_subscription( query=query2, exchange_name='viz_data_exchange.' + self.random_id_generator()) # maybe this is a good place to pass the couch DB table to use and other parameters configuration2 = { "stream_def_id": viz_stream_def_id, "data_product_id": data_product_id, "realtime_flag": "True" } # Launch the viz transform process viz_transform_id2 = self.tms_cli.create_transform( name='viz_transform_realtime_google_dt_' + self.random_id_generator() + '.' + data_product_id, in_subscription_id=viz_subscription_id2, out_streams={ "visualization_service_submit_stream_id": self.viz_service_submit_stream_id }, process_definition_id=self.google_dt_proc_def_id, configuration=configuration2) self.tms_cli.activate_transform(viz_transform_id2) # keep a record of the the viz_transform_id self.viz_data_dictionary['google_realtime_dt'][data_product_id][ 'transform_proc'] = viz_transform_id2
def start_google_dt_transform(self, data_product_id='', query=''): """Request to fetch the datatable for a data product as specified in the query. Query will also specify whether its a realtime view or one-shot @param data_product_id str @param query str @retval datatable str @throws NotFound object with specified id, query does not exist """ # generate a token unique for this request data_product_id_token = data_product_id + "." + self.random_id_generator( ) # Get object asssociated with data_product_id dp_obj = self.rr_cli.read(data_product_id) if dp_obj.dataset_id == '': return None # define replay. If no filters are passed the entire ingested dataset is returned replay_id, replay_stream_id = self.dr_cli.define_replay( dataset_id=dp_obj.dataset_id) replay_stream_def_id = self.pubsub_cli.find_stream_definition( stream_id=replay_stream_id, id_only=True) # setup the transform to handle the data coming back from the replay # Init storage for the resulting data_table self.viz_data_dictionary['google_dt'][data_product_id_token] = { 'data_table': [], 'ready_flag': False } # Create the subscription to the stream. This will be passed as parameter to the transform worker query = StreamQuery(stream_ids=[ replay_stream_id, ]) replay_subscription_id = self.pubsub_cli.create_subscription( query=query, exchange_name='viz_data_exchange.' + self.random_id_generator()) # maybe this is a good place to pass the couch DB table to use and other parameters configuration = { "stream_def_id": replay_stream_def_id, "data_product_id": data_product_id, "realtime_flag": "False", "data_product_id_token": data_product_id_token } # Launch the viz transform process viz_transform_id = self.tms_cli.create_transform( name='viz_transform_google_dt_' + self.random_id_generator() + '.' + data_product_id, in_subscription_id=replay_subscription_id, out_streams={ "visualization_service_submit_stream_id": self.viz_service_submit_stream_id }, process_definition_id=self.google_dt_proc_def_id, configuration=configuration) self.tms_cli.activate_transform(viz_transform_id) # keep a record of the the viz_transform_id self.viz_data_dictionary['google_dt'][data_product_id_token][ 'transform_proc'] = viz_transform_id # Start the replay and return the token self.dr_cli.start_replay(replay_id=replay_id) return "google_dt_transform_cb(\"" + data_product_id_token + "\")"
def test_dm_integration(self): ''' test_salinity_transform Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there were two stream definitions... ### # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=ctd_stream_def, name='Simulated CTD data') # create a stream definition for the data from the salinity Transform sal_stream_def_id = pubsub_management_service.create_stream_definition( container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.ctd_stream_publisher', 'class': 'SimpleCtdPublisher' } ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) # one for the salinity transform producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'class': 'SalinityTransform' } salinity_transform_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=ctd_stream_def_id) # Set up the datasets ctd_dataset_id = dataset_management_service.create_dataset( stream_id=ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service #--------------------------- # Set up the salinity transform #--------------------------- # Create the stream sal_stream_id = pubsub_management_service.create_stream( stream_definition_id=sal_stream_def_id) # Set up the datasets sal_dataset_id = dataset_management_service.create_dataset( stream_id=sal_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of the salinity as a dataset sal_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=sal_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Create a subscription as input to the transform sal_transform_input_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery(stream_ids=[ ctd_stream_id, ]), exchange_name='salinity_transform_input' ) # how do we make these names??? i.e. Should they be anonymous? # create the salinity transform sal_transform_id = transform_management_service.create_transform( name='example salinity transform', in_subscription_id=sal_transform_input_subscription_id, out_streams={ 'output': sal_stream_id, }, process_definition_id=salinity_transform_procdef_id, # no configuration needed at this time... ) # start the transform - for a test case it makes sense to do it before starting the producer but it is not required transform_management_service.activate_transform( transform_id=sal_transform_id) # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': ctd_stream_id, } } ctd_sim_pid = process_dispatcher.schedule_process( process_definition_id=ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### salinity_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ sal_stream_id, ]), exchange_name='salinity_test', name="test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Salinity data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data for message in results: psd = PointSupplementStreamParser( stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') import numpy assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
def test_usgs_integration(self): ''' test_usgs_integration Test full DM Services Integration using usgs ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here #----------------------------- pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) process_list = [] datasets = [] datastore_name = 'test_usgs_integration' #--------------------------- # Set up ingestion #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=8 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) usgs_stream_def = USGS_stream_definition() stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition') #--------------------------- # Set up the producers (CTD Simulators) #--------------------------- # Launch five simulated CTD producers for iteration in xrange(2): # Make a stream to output on stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id) #--------------------------- # Set up the datasets #--------------------------- dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Keep track of the datasets datasets.append(dataset_id) stream_policy_id = ingestion_management_service.create_dataset_configuration( dataset_id = dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id ) producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'eoi.agent.handler.usgs_stream_publisher', 'class':'UsgsPublisher' } configuration = { 'process':{ 'stream_id':stream_id, } } procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id) pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration) # Keep track, we'll kill 'em later. process_list.append(pid) # Get about 4 seconds of data time.sleep(4) #--------------------------- # Stop producing data #--------------------------- for process in process_list: process_dispatcher.cancel_process(process) #---------------------------------------------- # The replay and the transform, a love story. #---------------------------------------------- # Happy Valentines to the clever coder who catches the above! transform_definition = ProcessDefinition() transform_definition.executable = { 'module':'ion.processes.data.transforms.transform_example', 'class':'TransformCapture' } transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition) dataset_id = datasets.pop() # Just need one for now replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id) #-------------------------------------------- # I'm Selling magazine subscriptions here! #-------------------------------------------- subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]), exchange_name='transform_capture_point') #-------------------------------------------- # Start the transform (capture) #-------------------------------------------- transform_id = transform_management_service.create_transform( name='capture_transform', in_subscription_id=subscription, process_definition_id=transform_definition_id ) transform_management_service.activate_transform(transform_id=transform_id) #-------------------------------------------- # BEGIN REPLAY! #-------------------------------------------- data_retriever_service.start_replay(replay_id=replay_id) #-------------------------------------------- # Lets get some boundaries #-------------------------------------------- bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
def setUp(self): """ Setup the test environment to exersice use of instrumet agent, including: * define driver_config parameters. * create container with required services and container client. * create publication stream ids for each driver data stream. * create stream_config parameters. * create and activate subscriptions for agent data streams. * spawn instrument agent process and create agent client. * add cleanup functions to cause subscribers to get stopped. """ # params = { ('CTD', 'TA2'): -1.9434316e-05, # ('CTD', 'PTCA1'): 1.3206866, # ('CTD', 'TCALDATE'): [8, 11, 2006] } # for tup in params: # print tup self.addCleanup(self.customCleanUp) # Names of agent data streams to be configured. parsed_stream_name = 'ctd_parsed' raw_stream_name = 'ctd_raw' # Driver configuration. #Simulator self.driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method': 'ethernet', 'device_addr': CFG.device.sbe37.host, 'device_port': CFG.device.sbe37.port, 'server_addr': 'localhost', 'server_port': 8888 } } } #Hardware ''' self.driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method':'ethernet', 'device_addr': '137.110.112.119', 'device_port': 4001, 'server_addr': 'localhost', 'server_port': 8888 } } } ''' # Start container. self._start_container() # Establish endpoint with container (used in tests below) self._container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) # Bring up services in a deploy file (no need to message) self.container.start_rel_from_url('res/deploy/r2dm.yml') # Create a pubsub client to create streams. self._pubsub_client = PubsubManagementServiceClient( node=self.container.node) # A callback for processing subscribed-to data. def consume(message, headers): log.info('Subscriber received message: %s', str(message)) # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar( process=self.container, node=self.container.node) self.subs = [] # Create streams for each stream named in driver. self.stream_config = {} for (stream_name, val) in PACKET_CONFIG.iteritems(): stream_def = ctd_stream_definition(stream_id=None) stream_def_id = self._pubsub_client.create_stream_definition( container=stream_def) stream_id = self._pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding='ION R2') self.stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber( exchange_name=exchange_name, callback=consume) sub.start() query = StreamQuery(stream_ids=[stream_id]) sub_id = self._pubsub_client.create_subscription(\ query=query, exchange_name=exchange_name) self._pubsub_client.activate_subscription(sub_id) self.subs.append(sub) # Add cleanup function to stop subscribers. def stop_subscriber(sub_list): for sub in sub_list: sub.stop() self.addCleanup(stop_subscriber, self.subs) # Create agent config. self.agent_resource_id = '123xyz' self.agent_config = { 'driver_config': self.driver_config, 'stream_config': self.stream_config, 'agent': { 'resource_id': self.agent_resource_id } } # Launch an instrument agent process. self._ia_name = 'agent007' self._ia_mod = 'ion.agents.instrument.instrument_agent' self._ia_class = 'InstrumentAgent' self._ia_pid = self._container_client.spawn_process( name=self._ia_name, module=self._ia_mod, cls=self._ia_class, config=self.agent_config) log.info('got pid=%s', str(self._ia_pid)) self._ia_client = None # Start a resource agent client to talk with the instrument agent. self._ia_client = ResourceAgentClient(self.agent_resource_id, process=FakeProcess()) log.info('got ia client %s', str(self._ia_client))
def run_even_odd_transform(self): ''' This example script runs a chained three way transform: B A < C Where A is the even_odd transform (generates a stream of even and odd numbers from input) and B and C are the basic transforms that receive even and odd input ''' pubsub_cli = PubsubManagementServiceClient(node=self.container.node) tms_cli = TransformManagementServiceClient(node=self.container.node) procd_cli = ProcessDispatcherServiceClient(node=self.container.node) #------------------------------- # Process Definition #------------------------------- # Create the process definition for the basic transform process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class': 'TransformExample' } basic_transform_definition_id = procd_cli.create_process_definition( process_definition=process_definition) # Create The process definition for the TransformEvenOdd process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class': 'TransformEvenOdd' } evenodd_transform_definition_id = procd_cli.create_process_definition( process_definition=process_definition) #------------------------------- # Streams #------------------------------- input_stream_id = pubsub_cli.create_stream(name='input_stream', original=True) even_stream_id = pubsub_cli.create_stream(name='even_stream', original=True) odd_stream_id = pubsub_cli.create_stream(name='odd_stream', original=True) #------------------------------- # Subscriptions #------------------------------- query = StreamQuery(stream_ids=[input_stream_id]) input_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='input_queue') query = StreamQuery(stream_ids=[even_stream_id]) even_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='even_queue') query = StreamQuery(stream_ids=[odd_stream_id]) odd_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='odd_queue') #------------------------------- # Launch the EvenOdd Transform #------------------------------- evenodd_id = tms_cli.create_transform( name='even_odd', in_subscription_id=input_subscription_id, out_streams={ 'even': even_stream_id, 'odd': odd_stream_id }, process_definition_id=evenodd_transform_definition_id, configuration={}) tms_cli.activate_transform(evenodd_id) #------------------------------- # Launch the Even Processing Transform #------------------------------- even_transform_id = tms_cli.create_transform( name='even_transform', in_subscription_id=even_subscription_id, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(even_transform_id) #------------------------------- # Launch the Odd Processing Transform #------------------------------- odd_transform_id = tms_cli.create_transform( name='odd_transform', in_subscription_id=odd_subscription_id, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(odd_transform_id) #------------------------------- # Spawn the Streaming Producer #------------------------------- id_p = self.container.spawn_process( 'myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', { 'process': { 'type': 'stream_process', 'publish_streams': { 'out_stream': input_stream_id } }, 'stream_producer': { 'interval': 4000 } }) self.container.proc_manager.procs[id_p].start()
def test_update_stream_subscription(self): q = gevent.queue.Queue() def message_received(message, headers): q.put(message) subscriber = self.stream_subscriber.create_subscriber(exchange_name='a_queue', callback=message_received) subscriber.start() self.pubsub_cli.activate_subscription(self.ctd_subscription_id) # Both publishers are received by the subscriber self.ctd_stream1_publisher.publish('message1') self.assertEqual(q.get(timeout=5), 'message1') self.assertTrue(q.empty()) self.ctd_stream2_publisher.publish('message2') self.assertEqual(q.get(timeout=5), 'message2') self.assertTrue(q.empty()) # Update the subscription by removing a stream... subscription = self.pubsub_cli.read_subscription(self.ctd_subscription_id) stream_ids = list(subscription.query.stream_ids) stream_ids.remove(self.ctd_stream2_id) self.pubsub_cli.update_subscription( subscription_id=subscription._id, query=StreamQuery(stream_ids=stream_ids) ) # Stream 2 is no longer received self.ctd_stream2_publisher.publish('message2') p = None with self.assertRaises(gevent.queue.Empty) as cm: p = q.get(timeout=1) ex = cm.exception self.assertEqual(str(ex), '') self.assertEqual(p, None) # Stream 1 is as before self.ctd_stream1_publisher.publish('message1') self.assertEqual(q.get(timeout=5), 'message1') self.assertTrue(q.empty()) # Now swith the active streams... # Update the subscription by removing a stream... self.pubsub_cli.update_subscription( subscription_id=self.ctd_subscription_id, query=StreamQuery([self.ctd_stream2_id]) ) # Stream 1 is no longer received self.ctd_stream1_publisher.publish('message1') p = None with self.assertRaises(gevent.queue.Empty) as cm: p = q.get(timeout=1) ex = cm.exception self.assertEqual(str(ex), '') self.assertEqual(p, None) # Stream 2 is received self.ctd_stream2_publisher.publish('message2') self.assertEqual(q.get(timeout=5), 'message2') self.assertTrue(q.empty()) subscriber.stop()
def create_data_process(self, data_process_definition_id=None, in_data_product_ids='', out_data_products=None, configuration=None): """ @param data_process_definition_id: Object with definition of the transform to apply to the input data product @param in_data_product_ids: ID of the input data products @param out_data_products: list of IDs of the output data products @retval data_process_id: ID of the newly created data process object """ inform = "Input Data Product: "+str(in_data_product_ids)+\ "Transformed by: "+str(data_process_definition_id)+\ "To create output Product: "+str(out_data_products) log.debug("DataProcessManagementService:create_data_process()\n" + inform) if configuration is None: configuration = {} # Create and store a new DataProcess with the resource registry log.debug( "DataProcessManagementService:create_data_process - Create and store a new DataProcess with the resource registry" ) data_process_def_obj = self.read_data_process_definition( data_process_definition_id) data_process_name = "process_" + data_process_def_obj.name \ + time.ctime() self.data_process = IonObject(RT.DataProcess, name=data_process_name) data_process_id, version = self.clients.resource_registry.create( self.data_process) log.debug( "DataProcessManagementService:create_data_process - Create and store a new DataProcess with the resource registry data_process_id: %s" + str(data_process_id)) # Register the data process instance as a data producer with DataAcquisitionMgmtSvc #TODO: should this be outside this method? Called by orchestration? data_producer_id = self.clients.data_acquisition_management.register_process( data_process_id) log.debug( "DataProcessManagementService:create_data_process register process with DataAcquisitionMgmtSvc: data_producer_id: %s (L4-CI-SA-RQ-181)", str(data_producer_id)) self.output_stream_dict = {} #TODO: should this be outside this method? Called by orchestration? if out_data_products is None: raise BadRequest( "Data Process must have output product(s) specified %s", str(data_process_definition_id)) for name, out_data_product_id in out_data_products.iteritems(): # check that the product is not already associated with a producer producer_ids, _ = self.clients.resource_registry.find_objects( out_data_product_id, PRED.hasDataProducer, RT.DataProducer, True) if producer_ids: raise BadRequest( "Data Product should not already be associated to a DataProducer %s hasDataProducer %s", str(data_process_id), str(producer_ids[0])) #Assign each output Data Product to this producer resource out_data_product_obj = self.clients.resource_registry.read( out_data_product_id) if not out_data_product_obj: raise NotFound("Output Data Product %s does not exist" % out_data_product_id) # Associate with DataProcess: register as an output product for this process log.debug( "DataProcessManagementService:create_data_process link data process %s and output out data product: %s (L4-CI-SA-RQ-260)", str(data_process_id), str(out_data_product_id)) self.clients.data_acquisition_management.assign_data_product( data_process_id, out_data_product_id, create_stream=False) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.clients.resource_registry.find_objects( out_data_product_id, PRED.hasStream, RT.Stream, True) log.debug( "DataProcessManagementService:create_data_process retrieve out data prod streams: %s", str(stream_ids)) if not stream_ids: raise NotFound("No Stream created for output Data Product " + str(out_data_product_id)) if len(stream_ids) != 1: raise BadRequest( "Data Product should only have ONE stream at this time" + str(out_data_product_id)) self.output_stream_dict[name] = stream_ids[0] log.debug( "DataProcessManagementService:create_data_process -Register the data process instance as a data producer with DataAcquisitionMgmtSvc, then retrieve the id of the OUTPUT stream out_stream_id: " + str(self.output_stream_dict[name])) # Associate with dataProcess self.clients.resource_registry.create_association( data_process_id, PRED.hasProcessDefinition, data_process_definition_id) #check if data process has lookup tables attached self._find_lookup_tables(data_process_definition_id, configuration) #Todo: currently this is handled explicitly after creating the data product, that code then calls DMAS:assign_data_product log.debug( "DataProcessManagementService:create_data_process associate data process workflows with source data products %s hasInputProducts %s (L4-CI-SA-RQ-260)", str(data_process_id), str(in_data_product_ids)) for in_data_product_id in in_data_product_ids: self.clients.resource_registry.create_association( data_process_id, PRED.hasInputProduct, in_data_product_id) #check if in data product is attached to an instrument, check instrumentDevice and InstrumentModel for lookup table attachments instdevice_ids, _ = self.clients.resource_registry.find_subjects( RT.InstrumentDevice, PRED.hasOutputProduct, in_data_product_id, True) for instdevice_id in instdevice_ids: log.debug( "DataProcessManagementService:create_data_process instrument device_id assoc to the input data product of this data process: %s (L4-CI-SA-RQ-231)", str(instdevice_id)) self._find_lookup_tables(instdevice_id, configuration) instmodel_ids, _ = self.clients.resource_registry.find_objects( instdevice_id, PRED.hasModel, RT.InstrumentModel, True) for instmodel_id in instmodel_ids: log.debug( "DataProcessManagementService:create_data_process instmodel_id assoc to the instDevice: %s", str(instmodel_id)) self._find_lookup_tables(instmodel_id, configuration) #------------------------------- # Create subscription from in_data_product, which should already be associated with a stream via the Data Producer #------------------------------- # # first - get the data producer associated with this IN data product # log.debug("DataProcessManagementService:create_data_process - get the data producer associated with this IN data product") # producer_ids, _ = self.clients.resource_registry.find_objects(in_data_product_id, PRED.hasDataProducer, RT.DataProducer, True) # if not producer_ids: # raise NotFound("No Data Producer created for this Data Product " + str(in_data_product_id)) # if len(producer_ids) != 1: # raise BadRequest("Data Product should only have ONE Data Producers at this time" + str(in_data_product_id)) # in_product_producer = producer_ids[0] # log.debug("DataProcessManagementService:create_data_process - get the data producer associated with this IN data product in_product_producer: " + str(in_product_producer)) # second - get the streams associated with this IN data products self.in_stream_ids = [] for in_data_product_id in in_data_product_ids: log.debug( "DataProcessManagementService:create_data_process - get the stream associated with this IN data product" ) stream_ids, _ = self.clients.resource_registry.find_objects( in_data_product_id, PRED.hasStream, RT.Stream, True) if not stream_ids: raise NotFound("No Stream created for this IN Data Product " + str(in_data_product_id)) if len(stream_ids) != 1: raise BadRequest( "IN Data Product should only have ONE stream at this time" + str(in_data_product_id)) log.debug( "DataProcessManagementService:create_data_process - get the stream associated with this IN data product: %s in_stream_id: %s ", str(in_data_product_id), str(stream_ids[0])) self.in_stream_ids.append(stream_ids[0]) # Finally - create a subscription to the input stream log.debug( "DataProcessManagementService:create_data_process - Finally - create a subscription to the input stream" ) in_data_product_obj = self.clients.data_product_management.read_data_product( in_data_product_id) query = StreamQuery(stream_ids=self.in_stream_ids) #self.input_subscription_id = self.clients.pubsub_management.create_subscription(query=query, exchange_name=in_data_product_obj.name) self.input_subscription_id = self.clients.pubsub_management.create_subscription( query=query, exchange_name=data_process_name) log.debug( "DataProcessManagementService:create_data_process - Finally - create a subscription to the input stream input_subscription_id" + str(self.input_subscription_id)) # add the subscription id to the resource for clean up later data_process_obj = self.clients.resource_registry.read(data_process_id) data_process_obj.input_subscription_id = self.input_subscription_id self.clients.resource_registry.update(data_process_obj) procdef_ids, _ = self.clients.resource_registry.find_objects( data_process_definition_id, PRED.hasProcessDefinition, RT.ProcessDefinition, id_only=True) if not procdef_ids: raise BadRequest( "Cannot find associated ProcessDefinition for DataProcessDefinition id=%s" % data_process_definition_id) process_definition_id = procdef_ids[0] # Launch the transform process log.debug( "DataProcessManagementService:create_data_process - Launch the first transform process: " ) log.debug( "DataProcessManagementService:create_data_process - input_subscription_id: " + str(self.input_subscription_id)) log.debug( "DataProcessManagementService:create_data_process - out_stream_id: " + str(self.output_stream_dict)) log.debug( "DataProcessManagementService:create_data_process - process_definition_id: " + str(process_definition_id)) log.debug( "DataProcessManagementService:create_data_process - data_process_id: " + str(data_process_id)) transform_id = self.clients.transform_management.create_transform( name=data_process_id, description=data_process_id, in_subscription_id=self.input_subscription_id, out_streams=self.output_stream_dict, process_definition_id=process_definition_id, configuration=configuration) log.debug( "DataProcessManagementService:create_data_process - transform_id: " + str(transform_id)) self.clients.resource_registry.create_association( data_process_id, PRED.hasTransform, transform_id) log.debug( "DataProcessManagementService:create_data_process - Launch the first transform process transform_id" + str(transform_id)) # TODO: Flesh details of transform mgmt svc schedule method # self.clients.transform_management_service.schedule_transform(transform_id) return data_process_id
def setUp(self): mock_clients = self._create_service_mock('pubsub_management') self.pubsub_service = PubsubManagementService() self.pubsub_service.clients = mock_clients self.pubsub_service.container = DotDict() self.pubsub_service.container.node = Mock() # save some typing self.mock_create = mock_clients.resource_registry.create self.mock_update = mock_clients.resource_registry.update self.mock_delete = mock_clients.resource_registry.delete self.mock_read = mock_clients.resource_registry.read self.mock_create_association = mock_clients.resource_registry.create_association self.mock_delete_association = mock_clients.resource_registry.delete_association self.mock_find_resources = mock_clients.resource_registry.find_resources self.mock_find_associations = mock_clients.resource_registry.find_associations self.mock_find_objects = mock_clients.resource_registry.find_objects #StreamDefinition self.stream_definition_id = "stream_definition_id" self.stream_definition = Mock() self.stream_definition.name = "SampleStreamDefinition" self.stream_definition.description = "Sample StreamDefinition In PubSub" self.stream_definition.container = StreamDefinitionContainer() # Stream self.stream_id = "stream_id" self.stream = Mock() self.stream.name = "SampleStream" self.stream.description = "Sample Stream In PubSub" self.stream.encoding = "" self.stream.original = True self.stream.stream_definition_id = self.stream_definition_id self.stream.url = "" self.stream.producers = ['producer1', 'producer2', 'producer3'] #Subscription self.subscription_id = "subscription_id" self.subscription_stream_query = Mock() self.subscription_stream_query.name = "SampleSubscriptionStreamQuery" self.subscription_stream_query.description = "Sample Subscription With StreamQuery" self.subscription_stream_query.query = StreamQuery([self.stream_id]) self.subscription_stream_query.exchange_name = "ExchangeName" self.subscription_stream_query.subscription_type = SubscriptionTypeEnum.STREAM_QUERY self.subscription_stream_query.is_active = False self.subscription_exchange_query = Mock() self.subscription_exchange_query.name = "SampleSubscriptionExchangeQuery" self.subscription_exchange_query.description = "Sample Subscription With Exchange Query" self.subscription_exchange_query.query = ExchangeQuery() self.subscription_exchange_query.exchange_name = "ExchangeName" self.subscription_exchange_query.subscription_type = SubscriptionTypeEnum.EXCHANGE_QUERY self.subscription_exchange_query.is_active = False #Subscription Has Stream Association self.association_id = "association_id" self.subscription_to_stream_association = Mock() self.subscription_to_stream_association._id = self.association_id self.stream_route = Mock() self.stream_route.routing_key = self.stream_id + '.data'
def run_basic_transform(self): ''' Runs a basic example of a transform. It chains two transforms together, each add 1 to their input Producer -> A -> B Producer generates a number every four seconds and publishes it on the 'ctd_output_stream' the producer is acting as a CTD or instrument in this example. A is a basic transform that increments its input and publishes it on the 'transform_output' stream. B is a basic transform that receives input. All transforms write logging data to 'FS.TEMP/transform_output' so you can visually see activity of the transforms ''' pubsub_cli = PubsubManagementServiceClient(node=self.container.node) tms_cli = TransformManagementServiceClient(node=self.container.node) procd_cli = ProcessDispatcherServiceClient(node=self.container.node) #------------------------------- # Process Definition #------------------------------- process_definition = IonObject(RT.ProcessDefinition, name='transform_process_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class': 'TransformExample' } process_definition_id = procd_cli.create_process_definition( process_definition) #------------------------------- # First Transform #------------------------------- # Create a dummy output stream from a 'ctd' instrument ctd_output_stream_id = pubsub_cli.create_stream( name='ctd_output_stream', original=True) # Create the subscription to the ctd_output_stream query = StreamQuery(stream_ids=[ctd_output_stream_id]) ctd_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='ctd_output') # Create an output stream for the transform transform_output_stream_id = pubsub_cli.create_stream( name='transform_output', original=True) configuration = {} # Launch the first transform process transform_id = tms_cli.create_transform( name='basic_transform', in_subscription_id=ctd_subscription_id, out_streams={'output': transform_output_stream_id}, process_definition_id=process_definition_id, configuration=configuration) tms_cli.activate_transform(transform_id) #------------------------------- # Second Transform #------------------------------- # Create a SUBSCRIPTION to this output stream for the second transform query = StreamQuery(stream_ids=[transform_output_stream_id]) second_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='final_output') # Create a final output stream final_output_id = pubsub_cli.create_stream(name='final_output', original=True) configuration = {} second_transform_id = tms_cli.create_transform( name='second_transform', in_subscription_id=second_subscription_id, out_streams={'output': final_output_id}, process_definition_id=process_definition_id, configuration=configuration) tms_cli.activate_transform(second_transform_id) #------------------------------- # Producer (Sample Input) #------------------------------- # Create a producing example process id_p = self.container.spawn_process( 'myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', { 'process': { 'type': 'stream_process', 'publish_streams': { 'out_stream': ctd_output_stream_id } }, 'stream_producer': { 'interval': 4000 } }) self.container.proc_manager.procs[id_p].start()
def test_integrated_transform(self): ''' This example script runs a chained three way transform: B A < C Where A is the even_odd transform (generates a stream of even and odd numbers from input) and B and C are the basic transforms that receive even and odd input ''' cc = self.container assertions = self.assertTrue pubsub_cli = PubsubManagementServiceClient(node=cc.node) rr_cli = ResourceRegistryServiceClient(node=cc.node) tms_cli = TransformManagementServiceClient(node=cc.node) #------------------------------- # Process Definition #------------------------------- # Create the process definition for the basic transform process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class':'TransformExample' } basic_transform_definition_id, _ = rr_cli.create(process_definition) # Create The process definition for the TransformEvenOdd process_definition = IonObject(RT.ProcessDefinition, name='evenodd_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class':'TransformEvenOdd' } evenodd_transform_definition_id, _ = rr_cli.create(process_definition) #------------------------------- # Streams #------------------------------- streams = [pubsub_cli.create_stream() for i in xrange(5)] #------------------------------- # Subscriptions #------------------------------- query = StreamQuery(stream_ids=[streams[0]]) input_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='input_queue') query = StreamQuery(stream_ids = [streams[1]]) # even output even_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='even_queue') query = StreamQuery(stream_ids = [streams[2]]) # odd output odd_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='odd_queue') #------------------------------- # Launch the EvenOdd Transform #------------------------------- evenodd_id = tms_cli.create_transform(name='even_odd', in_subscription_id=input_subscription_id, out_streams={'even':streams[1], 'odd':streams[2]}, process_definition_id=evenodd_transform_definition_id, configuration={}) tms_cli.activate_transform(evenodd_id) #------------------------------- # Launch the Even Processing Transform #------------------------------- even_transform_id = tms_cli.create_transform(name='even_transform', in_subscription_id = even_subscription_id, out_streams={'even_plus1':streams[3]}, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(even_transform_id) #------------------------------- # Launch the Odd Processing Transform #------------------------------- odd_transform_id = tms_cli.create_transform(name='odd_transform', in_subscription_id = odd_subscription_id, out_streams={'odd_plus1':streams[4]}, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(odd_transform_id) #------------------------------- # Set up final subscribers #------------------------------- evenplus1_subscription_id = pubsub_cli.create_subscription( query=StreamQuery([streams[3]]), exchange_name='evenplus1_queue', name='EvenPlus1Subscription', description='EvenPlus1 SubscriptionDescription' ) oddplus1_subscription_id = pubsub_cli.create_subscription( query=StreamQuery([streams[4]]), exchange_name='oddplus1_queue', name='OddPlus1Subscription', description='OddPlus1 SubscriptionDescription' ) total_msg_count = 2 msgs = gevent.queue.Queue() def even1_message_received(message, headers): input = int(message.get('num')) assertions( (input % 2) ) # Assert it is odd (transform adds 1) msgs.put(True) def odd1_message_received(message, headers): input = int(message.get('num')) assertions(not (input % 2)) # Assert it is even msgs.put(True) subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node) even_subscriber = subscriber_registrar.create_subscriber(exchange_name='evenplus1_queue', callback=even1_message_received) odd_subscriber = subscriber_registrar.create_subscriber(exchange_name='oddplus1_queue', callback=odd1_message_received) # Start subscribers even_subscriber.start() odd_subscriber.start() # Activate subscriptions pubsub_cli.activate_subscription(evenplus1_subscription_id) pubsub_cli.activate_subscription(oddplus1_subscription_id) #------------------------------- # Set up fake stream producer #------------------------------- pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] # Normally the user does not see or create the publisher, this is part of the containers business. # For the test we need to set it up explicitly publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node) stream_publisher = publisher_registrar.create_publisher(stream_id=streams[0]) #------------------------------- # Start test #------------------------------- # Publish a stream for i in xrange(total_msg_count): stream_publisher.publish({'num':str(i)}) time.sleep(0.5) for i in xrange(total_msg_count * 2): try: msgs.get() except Empty: assertions(False, "Failed to process all messages correctly.")
def run_external_transform(self): ''' This example script illustrates how a transform can interact with the an outside process (very basic) it launches an external_transform example which uses the operating system command 'bc' to add 1 to the input Producer -> A -> 'FS.TEMP/transform_output' A is an external transform that spawns an OS process to increment the input by 1 ''' pubsub_cli = PubsubManagementServiceClient(node=self.container.node) tms_cli = TransformManagementServiceClient(node=self.container.node) procd_cli = ProcessDispatcherServiceClient(node=self.container.node) #------------------------------- # Process Definition #------------------------------- process_definition = ProcessDefinition( name='external_transform_definition') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.transform_example' process_definition.executable['class'] = 'ExternalTransform' process_definition_id = procd_cli.create_process_definition( process_definition=process_definition) #------------------------------- # Streams #------------------------------- input_stream_id = pubsub_cli.create_stream(name='input_stream', original=True) #------------------------------- # Subscription #------------------------------- query = StreamQuery(stream_ids=[input_stream_id]) input_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='input_queue') #------------------------------- # Launch Transform #------------------------------- transform_id = tms_cli.create_transform( name='external_transform', in_subscription_id=input_subscription_id, process_definition_id=process_definition_id, configuration={}) tms_cli.activate_transform(transform_id) #------------------------------- # Launch Producer #------------------------------- id_p = self.container.spawn_process( 'myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', { 'process': { 'type': 'stream_process', 'publish_streams': { 'out_stream': input_stream_id } }, 'stream_producer': { 'interval': 4000 } }) self.container.proc_manager.procs[id_p].start()
def test_raw_stream_integration(self): cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there was one stream definitions... ### # create a stream definition for the data from the ctd simulator raw_ctd_stream_def = SBE37_RAW_stream_definition() raw_ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=raw_ctd_stream_def, name='Simulated RAW CTD data') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.raw_stream_publisher', 'class': 'RawStreamPublisher' } raw_ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream raw_ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=raw_ctd_stream_def_id) # Set up the datasets raw_ctd_dataset_id = dataset_management_service.create_dataset( stream_id=raw_ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset raw_ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=raw_ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': raw_ctd_stream_id, } } raw_sim_pid = process_dispatcher.schedule_process( process_definition_id=raw_ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### raw_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ raw_ctd_stream_id, ]), exchange_name='raw_test', name="test raw subscription", ) # this is okay - even in cei mode! pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Raw data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='raw_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=raw_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( raw_sim_pid ) # kill the ctd simulator process - that is enough data gevent.sleep(1) for message in results: sha1 = message.identifiables['stream_encoding'].sha1 data = message.identifiables['data_stream'].values filename = FileSystem.get_hierarchical_url(FS.CACHE, sha1, ".raw") with open(filename, 'r') as f: assertions(data == f.read())
def instrument_test_driver(container): org_client = OrgManagementServiceClient(node=container.node) id_client = IdentityManagementServiceClient(node=container.node) system_actor = id_client.find_actor_identity_by_name(name=CFG.system.system_actor) log.info('system actor:' + system_actor._id) sa_header_roles = get_role_message_headers(org_client.find_all_roles_by_user(system_actor._id)) # Names of agent data streams to be configured. parsed_stream_name = 'ctd_parsed' raw_stream_name = 'ctd_raw' # Driver configuration. #Simulator driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method':'ethernet', 'device_addr': CFG.device.sbe37.host, 'device_port': CFG.device.sbe37.port, 'server_addr': 'localhost', 'server_port': 8888 } } } #Hardware _container_client = ContainerAgentClient(node=container.node, name=container.name) # Create a pubsub client to create streams. _pubsub_client = PubsubManagementServiceClient(node=container.node) # A callback for processing subscribed-to data. def consume(message, headers): log.info('Subscriber received message: %s', str(message)) # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar(process=container, node=container.node) subs = [] # Create streams for each stream named in driver. stream_config = {} for (stream_name, val) in PACKET_CONFIG.iteritems(): stream_def = ctd_stream_definition(stream_id=None) stream_def_id = _pubsub_client.create_stream_definition( container=stream_def) stream_id = _pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding='ION R2', headers={'ion-actor-id': system_actor._id, 'ion-actor-roles': sa_header_roles }) stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume) sub.start() query = StreamQuery(stream_ids=[stream_id]) sub_id = _pubsub_client.create_subscription(\ query=query, exchange_name=exchange_name ) _pubsub_client.activate_subscription(sub_id) subs.append(sub) # Create agent config. agent_resource_id = '123xyz' agent_config = { 'driver_config' : driver_config, 'stream_config' : stream_config, 'agent' : {'resource_id': agent_resource_id} } # Launch an instrument agent process. _ia_name = 'agent007' _ia_mod = 'ion.agents.instrument.instrument_agent' _ia_class = 'InstrumentAgent' _ia_pid = _container_client.spawn_process(name=_ia_name, module=_ia_mod, cls=_ia_class, config=agent_config) log.info('got pid=%s for resource_id=%s' % (str(_ia_pid), str(agent_resource_id)))
def test_activateInstrumentStream(self): # Create InstrumentModel instModel_obj = IonObject(RT.InstrumentModel, name='SBE37IMModel', description="SBE37IMModel", model_label="SBE37IMModel") try: instModel_id = self.imsclient.create_instrument_model( instModel_obj) except BadRequest as ex: self.fail("failed to create new InstrumentModel: %s" % ex) print 'new InstrumentModel id = ', instModel_id # Create InstrumentAgent instAgent_obj = IonObject( RT.InstrumentAgent, name='agent007', description="SBE37IMAgent", driver_module="ion.agents.instrument.instrument_agent", driver_class="InstrumentAgent") try: instAgent_id = self.imsclient.create_instrument_agent( instAgent_obj) except BadRequest as ex: self.fail("failed to create new InstrumentAgent: %s" % ex) print 'new InstrumentAgent id = ', instAgent_id self.imsclient.assign_instrument_model_to_instrument_agent( instModel_id, instAgent_id) # Create InstrumentDevice log.debug( 'test_activateInstrumentStream: Create instrument resource to represent the SBE37 (SA Req: L4-CI-SA-RQ-241) ' ) instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345") try: instDevice_id = self.imsclient.create_instrument_device( instrument_device=instDevice_obj) self.imsclient.assign_instrument_model_to_instrument_device( instModel_id, instDevice_id) except BadRequest as ex: self.fail("failed to create new InstrumentDevice: %s" % ex) log.debug( "test_activateInstrumentStream: new InstrumentDevice id = %s (SA Req: L4-CI-SA-RQ-241) ", instDevice_id) driver_config = { 'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'workdir': '/tmp/', } instAgentInstance_obj = IonObject( RT.InstrumentAgentInstance, name='SBE37IMAgentInstance', description="SBE37IMAgentInstance", driver_config=driver_config, comms_device_address='sbe37-simulator.oceanobservatories.org', comms_device_port=4001, port_agent_work_dir='/tmp/', port_agent_delimeter=['<<', '>>']) instAgentInstance_id = self.imsclient.create_instrument_agent_instance( instAgentInstance_obj, instAgent_id, instDevice_id) # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubcli.create_stream_definition( container=ctd_stream_def) log.debug( 'test_activateInstrumentStream new Stream Definition id = %s', instDevice_id) log.debug( 'test_activateInstrumentStream Creating new CDM data product with a stream definition' ) dp_obj = IonObject(RT.DataProduct, name='the parsed data', description='ctd stream test') try: data_product_id1 = self.dpclient.create_data_product( dp_obj, ctd_stream_def_id) except BadRequest as ex: self.fail("failed to create new data product: %s" % ex) log.debug('test_activateInstrumentStream new dp_id = %s', str(data_product_id1)) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id1) self.dpclient.activate_data_product_persistence( data_product_id=data_product_id1, persist_data=True, persist_metadata=True) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True) log.debug('test_activateInstrumentStream Data product streams1 = %s', str(stream_ids)) simdata_subscription_id = self.pubsubcli.create_subscription( query=StreamQuery([stream_ids[0]]), exchange_name='Sim_data_queue', name='SimDataSubscription', description='SimData SubscriptionDescription') def simdata_message_received(message, headers): input = str(message) log.debug("test_activateInstrumentStream: granule received: %s", input) subscriber_registrar = StreamSubscriberRegistrar( process=self.container, node=self.container.node) simdata_subscriber = subscriber_registrar.create_subscriber( exchange_name='Sim_data_queue', callback=simdata_message_received) # Start subscribers simdata_subscriber.start() # Activate subscriptions self.pubsubcli.activate_subscription(simdata_subscription_id) log.debug( 'test_activateInstrumentStream Creating new RAW data product with a stream definition' ) raw_stream_def = SBE37_RAW_stream_definition() raw_stream_def_id = self.pubsubcli.create_stream_definition( container=raw_stream_def) dp_obj = IonObject(RT.DataProduct, name='the raw data', description='raw stream test') try: data_product_id2 = self.dpclient.create_data_product( dp_obj, raw_stream_def_id) except BadRequest as ex: self.fail("failed to create new data product: %s" % ex) log.debug('test_activateInstrumentStream new dp_id = %s', str(data_product_id2)) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id2) self.dpclient.activate_data_product_persistence( data_product_id=data_product_id2, persist_data=True, persist_metadata=True) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasStream, None, True) log.debug('test_activateInstrumentStream Data product streams2 = %s', str(stream_ids)) self.imsclient.start_instrument_agent_instance( instrument_agent_instance_id=instAgentInstance_id) inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance( instAgentInstance_id) log.debug( 'test_activateInstrumentStream Instrument agent instance obj: = %s', str(inst_agent_instance_obj)) # Start a resource agent client to talk with the instrument agent. #self._ia_client = ResourceAgentClient('123xyz', name=inst_agent_instance_obj.agent_process_id, process=FakeProcess()) self._ia_client = ResourceAgentClient(instDevice_id, process=FakeProcess()) log.debug('test_activateInstrumentStream: got ia client %s', str(self._ia_client)) log.debug("test_activateInstrumentStream: got ia client %s", str(self._ia_client)) cmd = AgentCommand(command='initialize') retval = self._ia_client.execute_agent(cmd) log.debug("test_activateInstrumentStream: initialize %s", str(retval)) time.sleep(2) log.debug( "test_activateInstrumentStream: Sending go_active command (L4-CI-SA-RQ-334)" ) cmd = AgentCommand(command='go_active') reply = self._ia_client.execute_agent(cmd) log.debug( "test_activateInstrumentStream: return value from go_active %s", str(reply)) time.sleep(2) cmd = AgentCommand(command='get_current_state') retval = self._ia_client.execute_agent(cmd) state = retval.result log.debug( "test_activateInstrumentStream: current state after sending go_active command %s (L4-CI-SA-RQ-334)", str(state)) cmd = AgentCommand(command='run') reply = self._ia_client.execute_agent(cmd) log.debug("test_activateInstrument: run %s", str(reply)) time.sleep(2) log.debug("test_activateInstrumentStream: calling go_streaming ") cmd = AgentCommand(command='go_streaming') reply = self._ia_client.execute(cmd) log.debug("test_activateInstrumentStream: return from go_streaming %s", str(reply)) time.sleep(15) log.debug("test_activateInstrumentStream: calling go_observatory") cmd = AgentCommand(command='go_observatory') reply = self._ia_client.execute(cmd) log.debug( "test_activateInstrumentStream: return from go_observatory %s", str(reply)) time.sleep(2) log.debug("test_activateInstrumentStream: calling reset ") cmd = AgentCommand(command='reset') reply = self._ia_client.execute_agent(cmd) log.debug("test_activateInstrumentStream: return from reset %s", str(reply)) time.sleep(2) #------------------------------- # Deactivate InstrumentAgentInstance #------------------------------- self.imsclient.stop_instrument_agent_instance( instrument_agent_instance_id=instAgentInstance_id)