def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) self.ingestclient = IngestionManagementServiceClient( node=self.container.node) self.imsclient = InstrumentManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.datasetclient = DatasetManagementServiceClient( node=self.container.node) self.workflowclient = WorkflowManagementServiceClient( node=self.container.node) self.process_dispatcher = ProcessDispatcherServiceClient( node=self.container.node) self.ctd_stream_def = SBE37_CDM_stream_definition()
def setUp(self): # Start container #print 'instantiating container' self._start_container() #container = Container() #print 'starting container' #container.start() #print 'started container self.container.start_rel_from_url('res/deploy/r2deploy.yml') print 'started services' # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) self.ingestclient = IngestionManagementServiceClient( node=self.container.node) self.imsclient = InstrumentManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.datasetclient = DatasetManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.dataset_management = self.datasetclient
def setUp(self): # Start container #print 'instantiating container' self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) self.ingestclient = IngestionManagementServiceClient( node=self.container.node) self.dpmsclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.imsclient = InstrumentManagementServiceClient( node=self.container.node) self.omsclient = ObservatoryManagementServiceClient( node=self.container.node) self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() # deactivate all data processes when tests are complete def killAllDataProcesses(): for proc_id in self.rrclient.find_resources( RT.DataProcess, None, None, True)[0]: self.dataprocessclient.deactivate_data_process(proc_id) self.dataprocessclient.delete_data_process(proc_id) self.addCleanup(killAllDataProcesses)
def setUp(self): # Start container #print 'instantiating container' self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.dpmsclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.omsclient = ObservatoryManagementServiceClient(node=self.container.node) self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() # create missing data process definition dpd_obj = IonObject(RT.DataProcessDefinition, name=LOGICAL_TRANSFORM_DEFINITION_NAME, description="normally in preload", module='ion.processes.data.transforms.logical_transform', class_name='logical_transform') self.dataprocessclient.create_data_process_definition(dpd_obj) # deactivate all data processes when tests are complete def killAllDataProcesses(): for proc_id in self.rrclient.find_resources(RT.DataProcess, None, None, True)[0]: self.dataprocessclient.deactivate_data_process(proc_id) self.dataprocessclient.delete_data_process(proc_id) self.addCleanup(killAllDataProcesses)
def setUp(self): # Start container #print 'instantiating container' self._start_container() #container = Container() #print 'starting container' #container.start() #print 'started container' self.container.start_rel_from_url('res/deploy/r2deploy.yml') print 'started services' # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.pubsubcli = PubsubManagementServiceClient( node=self.container.node) self.ingestclient = IngestionManagementServiceClient( node=self.container.node) self.imsclient = InstrumentManagementServiceClient( node=self.container.node) self.dpclient = DataProductManagementServiceClient( node=self.container.node) self.datasetclient = DatasetManagementServiceClient( node=self.container.node) #setup listerner vars self._data_greenlets = [] self._no_samples = None self._samples_received = []
def setUp(self): # Start container #print 'instantiating container' self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dpsc_cli = DataProductManagementServiceClient() self.rrclient = ResourceRegistryServiceClient() self.damsclient = DataAcquisitionManagementServiceClient() self.pubsubcli = PubsubManagementServiceClient() self.ingestclient = IngestionManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ datastore_name = CACHE_DATASTORE_NAME self.db = self.container.datastore_manager.get_datastore( datastore_name) self.stream_def_id = self.pubsubcli.create_stream_definition( name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition( name='ingestion worker') ingestion_worker_definition.executable = { 'module': 'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class': 'ScienceGranuleIngestionWorker' } process_definition_id = self.process_dispatcher.create_process_definition( process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] #------------------------------------------------------------------------------------------------ # First launch the ingestors #------------------------------------------------------------------------------------------------ self.exchange_space = 'science_granule_ingestion' self.exchange_point = 'science_data' config = DotDict() config.process.datastore_name = 'datasets' config.process.queue_name = self.exchange_space self.exchange_names.append(self.exchange_space) self.exchange_points.append(self.exchange_point) pid = self.process_dispatcher.schedule_process( self.process_definitions['ingestion_worker'], configuration=config) log.debug("the ingestion worker process id: %s", pid) self.pids.append(pid)
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.ingestion_management = IngestionManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.ingest_name = 'basic' self.exchange = 'testdata'
def __init__(self): self.ingestion_management = IngestionManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.data_product_management = DataProductManagementServiceClient() self.dataset_management = DatasetManagementServiceClient() self._paused_streams = [] self._w_covs = {} self._ro_covs = {} self._context_managed = False
def setUp(self): import couchdb super(DatasetManagementIntTest,self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2dm.yml') self.db = self.container.datastore_manager.get_datastore('scidata', DataStore.DS_PROFILE.SCIDATA) self.db_raw = self.db.server self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node) self.ingestion_client = IngestionManagementServiceClient(node=self.container.node)
def clean_subscriptions(): ingestion_management = IngestionManagementServiceClient() pubsub = PubsubManagementServiceClient() rr = ResourceRegistryServiceClient() ingestion_config_ids = ingestion_management.list_ingestion_configurations(id_only=True) for ic in ingestion_config_ids: subscription_ids, assocs = rr.find_objects(subject=ic, predicate=PRED.hasSubscription, id_only=True) for subscription_id, assoc in zip(subscription_ids, assocs): rr.delete_association(assoc) try: pubsub.deactivate_subscription(subscription_id) except: log.exception("Unable to decativate subscription: %s", subscription_id) pubsub.delete_subscription(subscription_id)
def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.datasetclient = DatasetManagementServiceClient(node=self.container.node)
def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.event = Event() self.exchange_space_name = 'test_granules' self.exchange_point_name = 'science_data' self.i = 0 self.cci = 0
def setUp(self): # Start container #print 'instantiating container' self._start_container() log.debug("Start rel from url") self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.DPMS = DataProductManagementServiceClient() self.RR = ResourceRegistryServiceClient() self.RR2 = EnhancedResourceRegistryClient(self.RR) self.DAMS = DataAcquisitionManagementServiceClient() self.PSMS = PubsubManagementServiceClient() self.ingestclient = IngestionManagementServiceClient() self.PD = ProcessDispatcherServiceClient() self.DSMS = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ log.debug("get datastore") datastore_name = CACHE_DATASTORE_NAME self.db = self.container.datastore_manager.get_datastore( datastore_name) self.stream_def_id = self.PSMS.create_stream_definition( name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition( name='ingestion worker') ingestion_worker_definition.executable = { 'module': 'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class': 'ScienceGranuleIngestionWorker' } process_definition_id = self.PD.create_process_definition( process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] self.addCleanup(self.cleaning_up)
def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') log.debug("TestExternalDatasetAgentMgmt: started services") # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.pubsubcli = PubsubManagementServiceClient( node=self.container.node) self.ingestclient = IngestionManagementServiceClient( node=self.container.node) self.dpclient = DataProductManagementServiceClient( node=self.container.node) self.datasetclient = DatasetManagementServiceClient( node=self.container.node)
def on_start(self): super(IngestionLauncher, self).on_start() exchange_point = self.CFG.get_safe('ingestion.exchange_point', 'science_data') couch_opts = self.CFG.get_safe('ingestion.couch_storage', {}) couch_storage = CouchStorage(**couch_opts) hdf_opts = self.CFG.get_safe('ingestion.hdf_storage', {}) hdf_storage = HdfStorage(**hdf_opts) number_of_workers = self.CFG.get_safe('ingestion.number_of_workers', 2) ingestion_management_service = IngestionManagementServiceClient( node=self.container.node) ingestion_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id=exchange_point, couch_storage=couch_storage, hdf_storage=hdf_storage, number_of_workers=number_of_workers) ingestion_management_service.activate_ingestion_configuration( ingestion_id)
def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.ingestion_management = IngestionManagementServiceClient() self.rr = self.container.resource_registry
def test_usgs_integration(self): ''' test_usgs_integration Test full DM Services Integration using usgs ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here #----------------------------- pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) process_list = [] datasets = [] datastore_name = 'test_usgs_integration' #--------------------------- # Set up ingestion #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=8 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) usgs_stream_def = USGS_stream_definition() stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition') #--------------------------- # Set up the producers (CTD Simulators) #--------------------------- # Launch five simulated CTD producers for iteration in xrange(2): # Make a stream to output on stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id) #--------------------------- # Set up the datasets #--------------------------- dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Keep track of the datasets datasets.append(dataset_id) stream_policy_id = ingestion_management_service.create_dataset_configuration( dataset_id = dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id ) producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'eoi.agent.handler.usgs_stream_publisher', 'class':'UsgsPublisher' } configuration = { 'process':{ 'stream_id':stream_id, } } procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id) pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration) # Keep track, we'll kill 'em later. process_list.append(pid) # Get about 4 seconds of data time.sleep(4) #--------------------------- # Stop producing data #--------------------------- for process in process_list: process_dispatcher.cancel_process(process) #---------------------------------------------- # The replay and the transform, a love story. #---------------------------------------------- # Happy Valentines to the clever coder who catches the above! transform_definition = ProcessDefinition() transform_definition.executable = { 'module':'ion.processes.data.transforms.transform_example', 'class':'TransformCapture' } transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition) dataset_id = datasets.pop() # Just need one for now replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id) #-------------------------------------------- # I'm Selling magazine subscriptions here! #-------------------------------------------- subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]), exchange_name='transform_capture_point') #-------------------------------------------- # Start the transform (capture) #-------------------------------------------- transform_id = transform_management_service.create_transform( name='capture_transform', in_subscription_id=subscription, process_definition_id=transform_definition_id ) transform_management_service.activate_transform(transform_id=transform_id) #-------------------------------------------- # BEGIN REPLAY! #-------------------------------------------- data_retriever_service.start_replay(replay_id=replay_id) #-------------------------------------------- # Lets get some boundaries #-------------------------------------------- bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
def test_replay_integration(self): ''' test_replay_integration ''' import numpy as np # Keep the import it's used in the vector comparison below even though pycharm says its unused. cc = self.container XP = self.XP assertions = self.assertTrue ### Every thing below here can be run as a script: log.debug('Got it') pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) datastore_name = 'dm_test_replay_integration' producer = Publisher(name=(XP, 'stream producer')) ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id=XP, couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), hdf_storage=HdfStorage(), number_of_workers=1) ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) definition = SBE37_CDM_stream_definition() data_stream_id = definition.data_stream_id encoding_id = definition.identifiables[data_stream_id].encoding_id element_count_id = definition.identifiables[ data_stream_id].element_count_id stream_def_id = pubsub_management_service.create_stream_definition( container=definition) stream_id = pubsub_management_service.create_stream( stream_definition_id=stream_def_id) dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/dataset_by_id') ingestion_management_service.create_dataset_configuration( dataset_id=dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id=ingestion_configuration_id) definition.stream_resource_id = stream_id packet = _create_packet(definition) input_file = FileSystem.mktemp() input_file.write(packet.identifiables[data_stream_id].values) input_file_path = input_file.name input_file.close() fields = [ 'conductivity', 'height', 'latitude', 'longitude', 'pressure', 'temperature', 'time' ] input_vectors = acquire_data([input_file_path], fields, 2).next() producer.publish(msg=packet, to_name=(XP, '%s.data' % stream_id)) replay_id, replay_stream_id = data_retriever_service.define_replay( dataset_id) ar = gevent.event.AsyncResult() def sub_listen(msg, headers): assertions(isinstance(msg, StreamGranuleContainer), 'replayed message is not a granule.') hdf_string = msg.identifiables[data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() assertions(sha1 == msg.identifiables[encoding_id].sha1, 'Checksum failed.') assertions( msg.identifiables[element_count_id].value == 1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value) output_file = FileSystem.mktemp() output_file.write(msg.identifiables[data_stream_id].values) output_file_path = output_file.name output_file.close() output_vectors = acquire_data([output_file_path], fields, 2).next() for field in fields: comparison = (input_vectors[field]['values'] == output_vectors[field]['values']) assertions( comparison.all(), 'vector mismatch: %s vs %s' % (input_vectors[field]['values'], output_vectors[field]['values'])) FileSystem.unlink(output_file_path) ar.set(True) subscriber = Subscriber(name=(XP, 'replay listener'), callback=sub_listen) g = gevent.Greenlet(subscriber.listen, binding='%s.data' % replay_stream_id) g.start() data_retriever_service.start_replay(replay_id) ar.get(timeout=10) FileSystem.unlink(input_file_path)
def test_raw_stream_integration(self): cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there was one stream definitions... ### # create a stream definition for the data from the ctd simulator raw_ctd_stream_def = SBE37_RAW_stream_definition() raw_ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=raw_ctd_stream_def, name='Simulated RAW CTD data') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.raw_stream_publisher', 'class': 'RawStreamPublisher' } raw_ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream raw_ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=raw_ctd_stream_def_id) # Set up the datasets raw_ctd_dataset_id = dataset_management_service.create_dataset( stream_id=raw_ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset raw_ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=raw_ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': raw_ctd_stream_id, } } raw_sim_pid = process_dispatcher.schedule_process( process_definition_id=raw_ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### raw_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ raw_ctd_stream_id, ]), exchange_name='raw_test', name="test raw subscription", ) # this is okay - even in cei mode! pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Raw data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='raw_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=raw_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( raw_sim_pid ) # kill the ctd simulator process - that is enough data gevent.sleep(1) for message in results: sha1 = message.identifiables['stream_encoding'].sha1 data = message.identifiables['data_stream'].values filename = FileSystem.get_hierarchical_url(FS.CACHE, sha1, ".raw") with open(filename, 'r') as f: assertions(data == f.read())
def on_start(self): log.debug("VizStreamProducer start") self.data_source_name = self.CFG.get('name') self.dataset = self.CFG.get('dataset') # create a pubsub client and a resource registry client self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) # Dummy instrument related clients self.imsclient = InstrumentManagementServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.dpclient = DataProductManagementServiceClient( node=self.container.node) self.IngestClient = IngestionManagementServiceClient( node=self.container.node) # create the pubsub client self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) # Additional code for creating a dummy instrument """ # Set up the preconditions. Look for an existing ingestion config while True: log.info("VisStreamLauncher:on_start: Waiting for an ingestion configuration to be available.") ingestion_cfgs, _ = self.rrclient.find_resources(RT.IngestionConfiguration, None, None, True) if len(ingestion_cfgs) > 0: break else: gevent.sleep(1) """ # Check to see if the data_product already exists in the system (for e.g re launching the code after a crash) dp_ids, _ = self.rrclient.find_resources(RT.DataProduct, None, self.data_source_name, True) if len(dp_ids) > 0: data_product_id = dp_ids[0] print '>>>>>>>>>>>>> Found dp_id = ', data_product_id else: # Create InstrumentModel instModel_obj = IonObject(RT.InstrumentModel, name=self.data_source_name, description=self.data_source_name, model_label=self.data_source_name) instModel_id = self.imsclient.create_instrument_model( instModel_obj) # Create InstrumentDevice instDevice_obj = IonObject(RT.InstrumentDevice, name=self.data_source_name, description=self.data_source_name, serial_number="12345") instDevice_id = self.imsclient.create_instrument_device( instrument_device=instDevice_obj) self.imsclient.assign_instrument_model_to_instrument_device( instModel_id, instDevice_id) # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubclient.create_stream_definition( container=ctd_stream_def) print 'Creating new CDM data product with a stream definition' dp_obj = IonObject(RT.DataProduct, name=self.data_source_name, description='ctd stream test') data_product_id = self.dpclient.create_data_product( dp_obj, ctd_stream_def_id) self.damsclient.assign_data_product( input_resource_id=instDevice_id, data_product_id=data_product_id) self.dpclient.activate_data_product_persistence( data_product_id=data_product_id, persist_data=True, persist_metadata=True) print '>>>>>>>>>>>> New dp_id = ', data_product_id # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id, PRED.hasStream, None, True) if self.dataset == 'sinusoidal': pid = self.container.spawn_process( name='ctd_test.' + self.data_source_name, module='ion.processes.data.sinusoidal_stream_publisher', cls='SinusoidalCtdPublisher', config={'process': { 'stream_id': stream_ids[0] }}) else: pid = self.container.spawn_process( name='ctd_test.' + self.data_source_name, module='ion.processes.data.ctd_stream_publisher', cls='SimpleCtdPublisher', config={'process': { 'stream_id': stream_ids[0] }})
def test_dm_integration(self): ''' test_salinity_transform Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there were two stream definitions... ### # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=ctd_stream_def, name='Simulated CTD data') # create a stream definition for the data from the salinity Transform sal_stream_def_id = pubsub_management_service.create_stream_definition( container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.ctd_stream_publisher', 'class': 'SimpleCtdPublisher' } ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) # one for the salinity transform producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'class': 'SalinityTransform' } salinity_transform_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=ctd_stream_def_id) # Set up the datasets ctd_dataset_id = dataset_management_service.create_dataset( stream_id=ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service #--------------------------- # Set up the salinity transform #--------------------------- # Create the stream sal_stream_id = pubsub_management_service.create_stream( stream_definition_id=sal_stream_def_id) # Set up the datasets sal_dataset_id = dataset_management_service.create_dataset( stream_id=sal_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of the salinity as a dataset sal_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=sal_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Create a subscription as input to the transform sal_transform_input_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery(stream_ids=[ ctd_stream_id, ]), exchange_name='salinity_transform_input' ) # how do we make these names??? i.e. Should they be anonymous? # create the salinity transform sal_transform_id = transform_management_service.create_transform( name='example salinity transform', in_subscription_id=sal_transform_input_subscription_id, out_streams={ 'output': sal_stream_id, }, process_definition_id=salinity_transform_procdef_id, # no configuration needed at this time... ) # start the transform - for a test case it makes sense to do it before starting the producer but it is not required transform_management_service.activate_transform( transform_id=sal_transform_id) # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': ctd_stream_id, } } ctd_sim_pid = process_dispatcher.schedule_process( process_definition_id=ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### salinity_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ sal_stream_id, ]), exchange_name='salinity_test', name="test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Salinity data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data for message in results: psd = PointSupplementStreamParser( stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') import numpy assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
def on_start(self): log.debug("VizStreamProducer start") self.data_source_name = self.CFG.get_safe('name', 'sine_wave_generator') self.dataset = self.CFG.get_safe('dataset', 'sinusoidal') # create a pubsub client and a resource registry client self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) # Dummy instrument related clients self.imsclient = InstrumentManagementServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.dpclient = DataProductManagementServiceClient( node=self.container.node) self.ingestclient = IngestionManagementServiceClient( node=self.container.node) self.dataset_management = DatasetManagementServiceClient() # create the pubsub client self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) # Additional code for creating a dummy instrument """ # Set up the preconditions. Look for an existing ingestion config while True: log.info("VisStreamLauncher:on_start: Waiting for an ingestion configuration to be available.") ingestion_cfgs, _ = self.rrclient.find_resources(RT.IngestionConfiguration, None, None, True) if len(ingestion_cfgs) > 0: break else: gevent.sleep(1) """ # Check to see if the data_product already exists in the system (for e.g re launching the code after a crash) dp_ids, _ = self.rrclient.find_resources(RT.DataProduct, None, self.data_source_name, True) if len(dp_ids) > 0: data_product_id = dp_ids[0] else: # Create InstrumentModel instModel_obj = IonObject(RT.InstrumentModel, name=self.data_source_name, description=self.data_source_name) instModel_id = self.imsclient.create_instrument_model( instModel_obj) # Create InstrumentDevice instDevice_obj = IonObject(RT.InstrumentDevice, name=self.data_source_name, description=self.data_source_name, serial_number="12345") instDevice_id = self.imsclient.create_instrument_device( instrument_device=instDevice_obj) self.imsclient.assign_instrument_model_to_instrument_device( instModel_id, instDevice_id) # create a stream definition for the data from the ctd simulator pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubclient.create_stream_definition( name="SBE37_CDM", description="SBE37_CDM", parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() dp_obj = IonObject(RT.DataProduct, name=self.data_source_name, description='Example ctd stream', temporal_domain=tdom, spatial_domain=sdom) data_product_id = self.dpclient.create_data_product( dp_obj, stream_definition_id=ctd_stream_def_id) self.damsclient.assign_data_product( input_resource_id=instDevice_id, data_product_id=data_product_id) self.dpclient.activate_data_product_persistence( data_product_id=data_product_id) print ">>>>>>>>>>>>> Dataproduct for sine wave generator : ", data_product_id # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id, PRED.hasStream, None, True) if self.dataset == 'sinusoidal': self.container.spawn_process( name='ctd_test.' + self.data_source_name, module='ion.processes.data.sinusoidal_stream_publisher', cls='SinusoidalCtdPublisher', config={'process': { 'stream_id': stream_ids[0] }}) else: self.container.spawn_process( name='ctd_test.' + self.data_source_name, module='ion.processes.data.ctd_stream_publisher', cls='SimpleCtdPublisher', config={'process': { 'stream_id': stream_ids[0] }}) """