def test_code_download(self): # create a process definition that has no URL; only module and class. process_definition_no_url = ProcessDefinition( name='test_process_nodownload') process_definition_no_url.executable = { 'module': 'ion.my.test.process', 'class': 'TestProcess' } process_definition_id_no_url = self.pd_cli.create_process_definition( process_definition_no_url) # create another that has a URL of the python file (this very file) # verifies L4-CI-CEI-RQ114 url = "file://%s" % os.path.join(os.path.dirname(__file__), 'test_process_dispatcher.py') process_definition = ProcessDefinition(name='test_process_download') process_definition.executable = { 'module': 'ion.my.test.process', 'class': 'TestProcess', 'url': url } process_definition_id = self.pd_cli.create_process_definition( process_definition) process_target = ProcessTarget() process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target self.waiter.start() # Test a module with no download fails pid_no_url = self.pd_cli.create_process(process_definition_id_no_url) self.pd_cli.schedule_process(process_definition_id_no_url, process_schedule, process_id=pid_no_url) self.waiter.await_state_event(pid_no_url, ProcessStateEnum.FAILED) # Test a module with a URL runs pid = self.pd_cli.create_process(process_definition_id) self.pd_cli.schedule_process(process_definition_id, process_schedule, process_id=pid) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)
def create_event_process_definition(self, version='', module='', class_name='', uri='', arguments=None, event_types = None, sub_types = None, origin_types = None): """ Create a resource which defines the processing of events. @param version str @param module str @param class_name str @param uri str @param arguments list @return procdef_id str """ # Create the event process detail object event_process_definition_detail = EventProcessDefinitionDetail() event_process_definition_detail.event_types = event_types event_process_definition_detail.sub_types = sub_types event_process_definition_detail.origin_types = origin_types # Create the process definition process_definition = ProcessDefinition(name=create_unique_identifier('event_process')) process_definition.executable = { 'module':module, 'class': class_name, 'url': uri } process_definition.version = version process_definition.arguments = arguments process_definition.definition = event_process_definition_detail procdef_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition) return procdef_id
def create_data_process_definition(self, data_process_definition=None): result, _ = self.clients.resource_registry.find_resources( RT.DataProcessDefinition, None, data_process_definition.name, True) if result: raise BadRequest( "A data process definition named '%s' already exists" % data_process_definition.name) #todo: determine validation checks for a data process def data_process_definition_id, version = self.clients.resource_registry.create( data_process_definition) #------------------------------- # Process Definition #------------------------------- # Create the underlying process definition process_definition = ProcessDefinition() process_definition.name = data_process_definition.name process_definition.description = data_process_definition.description process_definition.executable = { 'module': data_process_definition.module, 'class': data_process_definition.class_name } process_definition_id = self.clients.process_dispatcher.create_process_definition( process_definition=process_definition) self.clients.resource_registry.create_association( data_process_definition_id, PRED.hasProcessDefinition, process_definition_id) return data_process_definition_id
def create_data_process_definition(self, data_process_definition=None): data_process_definition_id = self.RR2.create(data_process_definition, RT.DataProcessDefinition) # ------------------------------- # Process Definition # ------------------------------- # Create the underlying process definition process_definition = ProcessDefinition() process_definition.name = data_process_definition.name process_definition.description = data_process_definition.description process_definition.executable = { "module": data_process_definition.module, "class": data_process_definition.class_name, } process_definition_id = self.clients.process_dispatcher.create_process_definition( process_definition=process_definition ) self.RR2.assign_process_definition_to_data_process_definition_with_has_process_definition( process_definition_id, data_process_definition_id ) return data_process_definition_id
def launch_instrument(self, agt_id, agent_config, timeout_spawn=None): """ Launches an instrument agent. @param agt_id Some ID mainly used for logging @param agent_config Agent configuration @param timeout_spawn Timeout in secs for the RUNNING event (by default, the value given in constructor). If None or zero, no wait is performed. @return process ID """ timeout_spawn = timeout_spawn or self._timeout_spawn log.debug("launch_instrument: agt_id=%r, timeout_spawn=%s", agt_id, timeout_spawn) name = 'InstrumentAgent_%s' % agt_id pdef = ProcessDefinition(name=name) pdef.executable = { 'module': 'ion.agents.instrument.instrument_agent', 'class': 'InstrumentAgent' } pdef_id = self._pd_client.create_process_definition(process_definition=pdef) pid = self._agent_launcher.launch(agent_config, pdef_id) if timeout_spawn: log.debug("launch_instrument: agt_id=%r: waiting for RUNNING", agt_id) self._agent_launcher.await_launch(timeout_spawn) log.debug("launch_instrument: agt_id=%r: RUNNING", agt_id) return pid
def launch_instrument(self, agt_id, agent_config, timeout_spawn=None): """ Launches an instrument agent. @param agt_id Some ID mainly used for logging @param agent_config Agent configuration @param timeout_spawn Timeout in secs for the RUNNING event (by default, the value given in constructor). If None or zero, no wait is performed. @return process ID """ timeout_spawn = timeout_spawn or self._timeout_spawn log.debug("launch_instrument: agt_id=%r, timeout_spawn=%s", agt_id, timeout_spawn) name = 'InstrumentAgent_%s' % agt_id pdef = ProcessDefinition(name=name) pdef.executable = { 'module': 'ion.agents.instrument.instrument_agent', 'class': 'InstrumentAgent' } pdef_id = self._pd_client.create_process_definition( process_definition=pdef) pid = self._agent_launcher.launch(agent_config, pdef_id) if timeout_spawn: log.debug("launch_instrument: agt_id=%r: waiting for RUNNING", agt_id) self._agent_launcher.await_launch(timeout_spawn) log.debug("launch_instrument: agt_id=%r: RUNNING", agt_id) return pid
def dispatch_process(self, upid, spec, subscribers, constraints=None, immediate=False): name = spec.get('name') self.event_pub.publish_event(event_type="ProcessLifecycleEvent", origin=name, origin_type="DispatchedHAProcess", state=ProcessStateEnum.SPAWN) process_def = ProcessDefinition(name=name) process_def.executable = {'module': spec.get('module'), 'class': spec.get('class')} process_def_id = self.real_client.create_process_definition(process_def) pid = self.real_client.create_process(process_def_id) process_schedule = ProcessSchedule() sched_pid = self.real_client.schedule_process(process_def_id, process_schedule, configuration={}, process_id=pid) proc = self.real_client.read_process(sched_pid) dict_proc = {'upid': proc.process_id, 'state': self.state_map.get(proc.process_state, self.unknown_state), } return dict_proc
def start_input_stream_process( self, ctd_stream_id, module='ion.processes.data.ctd_stream_publisher', class_name='SimpleCtdPublisher'): ### ### Start the process for producing the CTD data ### # process definition for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': module, 'class': class_name } ctd_sim_procdef_id = self.process_dispatcher.create_process_definition( process_definition=producer_definition) # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': ctd_stream_id, } } ctd_sim_pid = self.process_dispatcher.schedule_process( process_definition_id=ctd_sim_procdef_id, configuration=configuration) return ctd_sim_pid
def launch_platform(self, agt_id, agent_config, timeout_spawn=30): """ Launches a platform agent. @param agt_id Some ID mainly used for logging @param agent_config Agent configuration @param timeout_spawn Timeout in secs for the SPAWN event (by default 30). If None or zero, no wait is performed. @return process ID """ log.debug("launch platform: agt_id=%r, timeout_spawn=%s", agt_id, timeout_spawn) name = 'PlatformAgent_%s' % agt_id pdef = ProcessDefinition(name=name) pdef.executable = { 'module': 'ion.agents.platform.platform_agent', 'class': 'PlatformAgent' } pdef_id = self._pd_client.create_process_definition(process_definition=pdef) pid = self._agent_launcher.launch(agent_config, pdef_id) if timeout_spawn: self._agent_launcher.await_launch(timeout_spawn) return pid
def setUp(self): # Start container #print 'instantiating container' self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dpsc_cli = DataProductManagementServiceClient() self.rrclient = ResourceRegistryServiceClient() self.damsclient = DataAcquisitionManagementServiceClient() self.pubsubcli = PubsubManagementServiceClient() self.ingestclient = IngestionManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ datastore_name = CACHE_DATASTORE_NAME self.db = self.container.datastore_manager.get_datastore( datastore_name) self.stream_def_id = self.pubsubcli.create_stream_definition( name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition( name='ingestion worker') ingestion_worker_definition.executable = { 'module': 'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class': 'ScienceGranuleIngestionWorker' } process_definition_id = self.process_dispatcher.create_process_definition( process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] #------------------------------------------------------------------------------------------------ # First launch the ingestors #------------------------------------------------------------------------------------------------ self.exchange_space = 'science_granule_ingestion' self.exchange_point = 'science_data' config = DotDict() config.process.datastore_name = 'datasets' config.process.queue_name = self.exchange_space self.exchange_names.append(self.exchange_space) self.exchange_points.append(self.exchange_point) pid = self.process_dispatcher.schedule_process( self.process_definitions['ingestion_worker'], configuration=config) log.debug("the ingestion worker process id: %s", pid) self.pids.append(pid)
def setUp(self): # Start container #print 'instantiating container' self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dpsc_cli = DataProductManagementServiceClient(node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubcli = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ datastore_name = CACHE_DATASTORE_NAME self.db = self.container.datastore_manager.get_datastore(datastore_name) self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition(name='ingestion worker') ingestion_worker_definition.executable = { 'module':'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class' :'ScienceGranuleIngestionWorker' } process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] #------------------------------------------------------------------------------------------------ # First launch the ingestors #------------------------------------------------------------------------------------------------ self.exchange_space = 'science_granule_ingestion' self.exchange_point = 'science_data' config = DotDict() config.process.datastore_name = 'datasets' config.process.queue_name = self.exchange_space self.exchange_names.append(self.exchange_space) self.exchange_points.append(self.exchange_point) pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config) log.debug("the ingestion worker process id: %s", pid) self.pids.append(pid) self.addCleanup(self.cleaning_up)
def create_definition(self, definition_id, definition_type, executable, name=None, description=None): if name is None: raise BadRequest("create_definition must have a name supplied") # note: we lose the description definition = ProcessDefinition(name=name) definition.executable = {'module': executable.get('module'), 'class': executable.get('class')} definition.definition_type = definition_type created_definition = self.real_client.create_process_definition( definition, definition_id)
def test_code_download(self): # create a process definition that has no URL; only module and class. process_definition_no_url = ProcessDefinition(name='test_process_nodownload') process_definition_no_url.executable = {'module': 'ion.my.test.process', 'class': 'TestProcess'} process_definition_id_no_url = self.pd_cli.create_process_definition(process_definition_no_url) # create another that has a URL of the python file (this very file) # verifies L4-CI-CEI-RQ114 url = "file://%s" % os.path.join(os.path.dirname(__file__), 'test_process_dispatcher.py') process_definition = ProcessDefinition(name='test_process_download') process_definition.executable = {'module': 'ion.my.test.process', 'class': 'TestProcess', 'url': url} process_definition_id = self.pd_cli.create_process_definition(process_definition) process_target = ProcessTarget() process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target self.waiter.start() # Test a module with no download fails pid_no_url = self.pd_cli.create_process(process_definition_id_no_url) self.pd_cli.schedule_process(process_definition_id_no_url, process_schedule, process_id=pid_no_url) self.waiter.await_state_event(pid_no_url, ProcessStateEnum.FAILED) # Test a module with a URL runs pid = self.pd_cli.create_process(process_definition_id) self.pd_cli.schedule_process(process_definition_id, process_schedule, process_id=pid) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)
def create_worker(self, number_of_workers=1): """ Creates notification workers @param number_of_workers int @retval pids list """ pids = [] for n in xrange(number_of_workers): process_definition = ProcessDefinition( name='notification_worker_%s' % n) process_definition.executable = { 'module': 'ion.processes.data.transforms.notification_worker', 'class': 'NotificationWorker' } process_definition_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # ------------------------------------------------------------------------------------ # Process Spawning # ------------------------------------------------------------------------------------ pid2 = self.process_dispatcher.create_process( process_definition_id) #@todo put in a configuration configuration = {} configuration['process'] = dict({ 'name': 'notification_worker_%s' % n, 'type': 'simple', 'queue_name': 'notification_worker_queue' }) pid = self.process_dispatcher.schedule_process( process_definition_id, configuration=configuration, process_id=pid2) pids.append(pid) return pids
def _do_launch_gate(self, platform_id, agent_config, timeout_spawn): """ The method for when using the ProcessStateGate pattern, which is the one used by test_oms_launch2 to launch the root platform. """ log.debug("_do_launch_gate: platform_id=%r, timeout_spawn=%s", platform_id, timeout_spawn) pa_name = 'PlatformAgent_%s' % platform_id pdef = ProcessDefinition(name=pa_name) pdef.executable = {'module': PA_MOD, 'class': PA_CLS} pdef_id = self._pd_client.create_process_definition( process_definition=pdef) log.debug("using schedule_process directly %r", platform_id) pid = self._pd_client.schedule_process(process_definition_id=pdef_id, schedule=None, configuration=agent_config) if timeout_spawn: # ProcessStateGate used as indicated in its pydoc (9/21/12) gate = ProcessStateGate(self._pd_client.read_process, pid, ProcessStateEnum.RUNNING) err_msg = None try: if not gate. await (timeout_spawn): err_msg = "The platform agent instance did not spawn in " \ "%s seconds. gate.wait returned false. " % \ timeout_spawn log.error(err_msg) except Exception as e: log.error( "Exception while waiting for platform agent instance " "(platform_id=%r) " "to spawn in %s seconds: %s", platform_id, timeout_spawn, str(e)) #,exc_Info=True) if err_msg: raise PlatformException(err_msg) log.debug( "_do_launch_gate: platform_id=%r: agent spawned, pid=%r " "(ProcessStateGate pattern used)", platform_id, pid) return pid
def _do_launch_gate(self, platform_id, agent_config, timeout_spawn): """ The method for when using the ProcessStateGate pattern, which is the one used by test_oms_launch2 to launch the root platform. """ log.debug("_do_launch_gate: platform_id=%r, timeout_spawn=%s", platform_id, timeout_spawn) pa_name = 'PlatformAgent_%s' % platform_id pdef = ProcessDefinition(name=pa_name) pdef.executable = { 'module': PA_MOD, 'class': PA_CLS } pdef_id = self._pd_client.create_process_definition(process_definition=pdef) log.debug("using schedule_process directly %r", platform_id) pid = self._pd_client.schedule_process(process_definition_id=pdef_id, schedule=None, configuration=agent_config) if timeout_spawn: # ProcessStateGate used as indicated in its pydoc (9/21/12) gate = ProcessStateGate(self._pd_client.read_process, pid, ProcessStateEnum.RUNNING) err_msg = None try: if not gate.await(timeout_spawn): err_msg = "The platform agent instance did not spawn in " \ "%s seconds. gate.wait returned false. " % \ timeout_spawn log.error(err_msg) except Exception as e: log.error("Exception while waiting for platform agent instance " "(platform_id=%r) " "to spawn in %s seconds: %s", platform_id, timeout_spawn, str(e)) #,exc_Info=True) if err_msg: raise PlatformException(err_msg) log.debug("_do_launch_gate: platform_id=%r: agent spawned, pid=%r " "(ProcessStateGate pattern used)", platform_id, pid) return pid
def create_logger(self, name, stream_id=""): # logger process producer_definition = ProcessDefinition(name=name + "_logger") producer_definition.executable = { "module": "ion.processes.data.stream_granule_logger", "class": "StreamGranuleLogger", } logger_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition) configuration = {"process": {"stream_id": stream_id}} pid = self.processdispatchclient.schedule_process( process_definition_id=logger_procdef_id, configuration=configuration ) return pid
def setUp(self): # Start container #print 'instantiating container' self._start_container() log.debug("Start rel from url") self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.DPMS = DataProductManagementServiceClient() self.RR = ResourceRegistryServiceClient() self.RR2 = EnhancedResourceRegistryClient(self.RR) self.DAMS = DataAcquisitionManagementServiceClient() self.PSMS = PubsubManagementServiceClient() self.ingestclient = IngestionManagementServiceClient() self.PD = ProcessDispatcherServiceClient() self.DSMS = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ log.debug("get datastore") datastore_name = CACHE_DATASTORE_NAME self.db = self.container.datastore_manager.get_datastore( datastore_name) self.stream_def_id = self.PSMS.create_stream_definition( name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition( name='ingestion worker') ingestion_worker_definition.executable = { 'module': 'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class': 'ScienceGranuleIngestionWorker' } process_definition_id = self.PD.create_process_definition( process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] self.addCleanup(self.cleaning_up)
def create_process(name= '', module = '', class_name = '', configuration = None): ''' A helper method to create a process ''' producer_definition = ProcessDefinition(name=name) producer_definition.executable = { 'module':module, 'class': class_name } process_dispatcher = ProcessDispatcherServiceClient() procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) pid = process_dispatcher.schedule_process(process_definition_id= procdef_id, configuration=configuration) return pid
def create_process(name= '', module = '', class_name = '', configuration = None): ''' A helper method to create a process ''' producer_definition = ProcessDefinition(name=name) producer_definition.executable = { 'module':module, 'class': class_name } process_dispatcher = ProcessDispatcherServiceClient() procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) pid = process_dispatcher.schedule_process(process_definition_id= procdef_id, configuration=configuration) return pid
def create_logger(self, name, stream_id=''): # logger process producer_definition = ProcessDefinition(name=name+'_logger') producer_definition.executable = { 'module':'ion.processes.data.stream_granule_logger', 'class':'StreamGranuleLogger' } logger_procdef_id = self.process_dispatch_client.create_process_definition(process_definition=producer_definition) configuration = { 'process':{ 'stream_id':stream_id, } } pid = self.process_dispatch_client.schedule_process(process_definition_id=logger_procdef_id, configuration=configuration) return pid
def create_logger(self, name, stream_id=''): # logger process producer_definition = ProcessDefinition(name=name+'_logger') producer_definition.executable = { 'module':'ion.processes.data.stream_granule_logger', 'class':'StreamGranuleLogger' } logger_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition) configuration = { 'process':{ 'stream_id':stream_id, } } pid = self.processdispatchclient.schedule_process(process_definition_id= logger_procdef_id, configuration=configuration) return pid
def create_worker(self, number_of_workers=1): """ Creates notification workers @param number_of_workers int @retval pids list """ pids = [] for n in xrange(number_of_workers): process_definition = ProcessDefinition( name='notification_worker_%s' % n) process_definition.executable = { 'module': 'ion.processes.data.transforms.notification_worker', 'class':'NotificationWorker' } process_definition_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # ------------------------------------------------------------------------------------ # Process Spawning # ------------------------------------------------------------------------------------ pid2 = self.process_dispatcher.create_process(process_definition_id) #@todo put in a configuration configuration = {} configuration['process'] = dict({ 'name': 'notification_worker_%s' % n, 'type':'simple', 'queue_name': 'notification_worker_queue' }) pid = self.process_dispatcher.schedule_process( process_definition_id, configuration = configuration, process_id=pid2 ) pids.append(pid) return pids
def create_event_process_definition(self, version='', module='', class_name='', uri='', arguments=None, event_types=None, sub_types=None, origin_types=None): """ Create a resource which defines the processing of events. @param version str @param module str @param class_name str @param uri str @param arguments list @return procdef_id str """ # Create the event process detail object event_process_definition_detail = EventProcessDefinitionDetail() event_process_definition_detail.event_types = event_types event_process_definition_detail.sub_types = sub_types event_process_definition_detail.origin_types = origin_types # Create the process definition process_definition = ProcessDefinition( name=create_unique_identifier('event_process')) process_definition.executable = { 'module': module, 'class': class_name, 'url': uri } process_definition.version = version process_definition.arguments = arguments process_definition.definition = event_process_definition_detail procdef_id = self.clients.process_dispatcher.create_process_definition( process_definition=process_definition) return procdef_id
def setUp(self): # Start container #print 'instantiating container' self._start_container() log.debug("Start rel from url") self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.DPMS = DataProductManagementServiceClient() self.RR = ResourceRegistryServiceClient() self.RR2 = EnhancedResourceRegistryClient(self.RR) self.DAMS = DataAcquisitionManagementServiceClient() self.PSMS = PubsubManagementServiceClient() self.ingestclient = IngestionManagementServiceClient() self.PD = ProcessDispatcherServiceClient() self.DSMS = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ log.debug("get datastore") datastore_name = CACHE_DATASTORE_NAME self.db = self.container.datastore_manager.get_datastore(datastore_name) self.stream_def_id = self.PSMS.create_stream_definition(name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition(name='ingestion worker') ingestion_worker_definition.executable = { 'module':'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class' :'ScienceGranuleIngestionWorker' } process_definition_id = self.PD.create_process_definition(process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] self.addCleanup(self.cleaning_up)
def start_input_stream_process(self, ctd_stream_id, module = 'ion.processes.data.ctd_stream_publisher', class_name= 'SimpleCtdPublisher'): ### ### Start the process for producing the CTD data ### # process definition for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module':module, 'class':class_name } ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition) # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':ctd_stream_id, } } ctd_sim_pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration) return ctd_sim_pid
def create_data_process_definition(self, data_process_definition=None): result, _ = self.clients.resource_registry.find_resources(RT.DataProcessDefinition, None, data_process_definition.name, True) validate_true( len(result) ==0, "A data process definition named '%s' already exists" % data_process_definition.name) #todo: determine validation checks for a data process def data_process_definition_id, version = self.clients.resource_registry.create(data_process_definition) #------------------------------- # Process Definition #------------------------------- # Create the underlying process definition process_definition = ProcessDefinition() process_definition.name = data_process_definition.name process_definition.description = data_process_definition.description process_definition.executable = {'module':data_process_definition.module, 'class':data_process_definition.class_name} process_definition_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition) self.clients.resource_registry.create_association(data_process_definition_id, PRED.hasProcessDefinition, process_definition_id) return data_process_definition_id
def launch_producer(self, stream_id=""): # -------------------------------------------------------------------------------- # Create the process definition for the producer # -------------------------------------------------------------------------------- producer_definition = ProcessDefinition(name="Example Data Producer") producer_definition.executable = { "module": "ion.processes.data.example_data_producer", "class": "BetterDataProducer", } process_definition_id = self.process_dispatcher.create_process_definition( process_definition=producer_definition ) # -------------------------------------------------------------------------------- # Launch the producer # -------------------------------------------------------------------------------- config = DotDict() config.process.stream_id = stream_id pid = self.process_dispatcher.schedule_process( process_definition_id=process_definition_id, configuration=config ) self.pids.append(pid)
def test_cei_launch_mode(self): pdc = ProcessDispatcherServiceClient(node=self.container.node) p_def = ProcessDefinition(name='Agent007') p_def.executable = { 'module' : 'ion.agents.instrument.instrument_agent', 'class' : 'InstrumentAgent' } p_def_id = pdc.create_process_definition(p_def) pid = pdc.create_process(p_def_id) def event_callback(event, *args, **kwargs): print '######### proc %s in state %s' % (event.origin, ProcessStateEnum._str_map[event.state]) sub = EventSubscriber(event_type='ProcessLifecycleEvent', callback=event_callback, origin=pid, origin_type='DispatchedProcess') sub.start() agent_config = deepcopy(self._agent_config) agent_config['bootmode'] = 'restart' pdc.schedule_process(p_def_id, process_id=pid, configuration=agent_config) gevent.sleep(5) pdc.cancel_process(pid) gevent.sleep(15) sub.stop()
def create_data_process_definition(self, data_process_definition=None): data_process_definition_id = self.RR2.create(data_process_definition, RT.DataProcessDefinition) #------------------------------- # Process Definition #------------------------------- # Create the underlying process definition process_definition = ProcessDefinition() process_definition.name = data_process_definition.name process_definition.description = data_process_definition.description process_definition.executable = { 'module': data_process_definition.module, 'class': data_process_definition.class_name } process_definition_id = self.clients.process_dispatcher.create_process_definition( process_definition=process_definition) self.RR2.assign_process_definition_to_data_process_definition_with_has_process_definition( process_definition_id, data_process_definition_id) return data_process_definition_id
def test_cei_launch_mode(self): pdc = ProcessDispatcherServiceClient(node=self.container.node) p_def = ProcessDefinition(name='Agent007') p_def.executable = { 'module' : 'ion.agents.instrument.instrument_agent', 'class' : 'InstrumentAgent' } p_def_id = pdc.create_process_definition(p_def) pid = pdc.create_process(p_def_id) def event_callback(event, *args, **kwargs): print '######### proc %s in state %s' % (event.origin, ProcessStateEnum._str_map[event.state]) sub = EventSubscriber(event_type='ProcessLifecycleEvent', callback=event_callback, origin=pid, origin_type='DispatchedProcess') sub.start() agent_config = deepcopy(self._agent_config) agent_config['bootmode'] = 'restart' pdc.schedule_process(p_def_id, process_id=pid, configuration=agent_config) gevent.sleep(5) pdc.cancel_process(pid) gevent.sleep(15) sub.stop()
def test_ingest_to_replay(self): self.async_done = AsyncResult() sysname = get_sys_name() datastore = self.container.datastore_manager.get_datastore(self.datastore_name,'SCIDATA') producer_definition = ProcessDefinition(name='Example Data Producer') producer_definition.executable = { 'module':'ion.processes.data.example_data_producer', 'class' :'ExampleDataProducer' } process_definition_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition) ingestion_configuration_id = self.ingestion_management.create_ingestion_configuration( exchange_point_id = 'science_data', couch_storage=CouchStorage(datastore_name=self.datastore_name,datastore_profile='SCIDATA'), number_of_workers=1 ) self.ingestion_management.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) stream_id = self.pubsub_management.create_stream(name='data stream') dataset_id = self.dataset_management.create_dataset( stream_id = stream_id, datastore_name = self.datastore_name, ) self.ingestion_management.create_dataset_configuration( dataset_id = dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id ) configuration = { 'process': { 'stream_id' : stream_id } } self.process_dispatcher.schedule_process(process_definition_id, configuration=configuration) replay_id, stream_id = self.data_retriever.define_replay(dataset_id = dataset_id) subscriber = Subscriber(name=('%s.science_data' % sysname, 'test_queue'), callback=self.subscriber_action, binding='%s.data' % stream_id) gevent.spawn(subscriber.listen) done = False while not done: results = datastore.query_view('manifest/by_dataset') if len(results) >= 2: done = True self.data_retriever.start_replay(replay_id) self.async_done.get(timeout=10)
def test_raw_stream_integration(self): cc = self.container assertions = self.assertTrue # ----------------------------- # Copy below here to run as a script (don't forget the imports of course!) # ----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = "test_dm_integration" datastore = cc.datastore_manager.get_datastore(datastore_name, profile=DataStore.DS_PROFILE.SCIDATA) ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition(name="Example Data Producer") producer_definition.executable = { "module": "ion.processes.data.example_data_producer", "class": "ExampleDataProducer", } producer_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) # --------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system # --------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug("Calling create_ingestion_configuration") ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id="science_data", couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile="SCIDATA"), number_of_workers=1, ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id ) # --------------------------- # Set up the producer (CTD Simulator) # --------------------------- # Create the stream stream_id = pubsub_management_service.create_stream(name="A data stream") # Set up the datasets dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name="Undefined!" ) # Configure ingestion of this dataset dataset_ingest_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id=ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto dataset_ingest_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Start the ctd simulator to produce some data configuration = {"process": {"stream_id": stream_id}} producer_pid = process_dispatcher.schedule_process( process_definition_id=producer_procdef_id, configuration=configuration ) found = False processes = cc.proc_manager.procs.values() for proc in processes: if isinstance(proc, IngestionWorker): found = True break self.assertTrue(found, "%s" % cc.proc_manager.procs) done = False while not done: results = datastore.query_view("manifest/by_dataset") if len(results) >= 5: done = True
def test_usgs_integration(self): ''' test_usgs_integration Test full DM Services Integration using usgs ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here #----------------------------- pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) process_list = [] datasets = [] datastore_name = 'test_usgs_integration' #--------------------------- # Set up ingestion #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=8 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) usgs_stream_def = USGS_stream_definition() stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition') #--------------------------- # Set up the producers (CTD Simulators) #--------------------------- # Launch five simulated CTD producers for iteration in xrange(2): # Make a stream to output on stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id) #--------------------------- # Set up the datasets #--------------------------- dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Keep track of the datasets datasets.append(dataset_id) stream_policy_id = ingestion_management_service.create_dataset_configuration( dataset_id = dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id ) producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.agents.eoi.handler.usgs_stream_publisher', 'class':'UsgsPublisher' } configuration = { 'process':{ 'stream_id':stream_id, } } procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id) pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration) # Keep track, we'll kill 'em later. process_list.append(pid) # Get about 4 seconds of data time.sleep(4) #--------------------------- # Stop producing data #--------------------------- for process in process_list: process_dispatcher.cancel_process(process) #---------------------------------------------- # The replay and the transform, a love story. #---------------------------------------------- # Happy Valentines to the clever coder who catches the above! transform_definition = ProcessDefinition() transform_definition.executable = { 'module':'ion.processes.data.transforms.transform_example', 'class':'TransformCapture' } transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition) dataset_id = datasets.pop() # Just need one for now replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id) #-------------------------------------------- # I'm Selling magazine subscriptions here! #-------------------------------------------- subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]), exchange_name='transform_capture_point') #-------------------------------------------- # Start the transform (capture) #-------------------------------------------- transform_id = transform_management_service.create_transform( name='capture_transform', in_subscription_id=subscription, process_definition_id=transform_definition_id ) transform_management_service.activate_transform(transform_id=transform_id) #-------------------------------------------- # BEGIN REPLAY! #-------------------------------------------- data_retriever_service.start_replay(replay_id=replay_id) #-------------------------------------------- # Lets get some boundaries #-------------------------------------------- bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
def test_usgs_integration(self): ''' test_usgs_integration Test full DM Services Integration using usgs ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here #----------------------------- pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) process_list = [] datasets = [] datastore_name = 'test_usgs_integration' #--------------------------- # Set up ingestion #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=8 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) usgs_stream_def = USGS_stream_definition() stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition') #--------------------------- # Set up the producers (CTD Simulators) #--------------------------- # Launch five simulated CTD producers for iteration in xrange(2): # Make a stream to output on stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id) #--------------------------- # Set up the datasets #--------------------------- dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Keep track of the datasets datasets.append(dataset_id) stream_policy_id = ingestion_management_service.create_dataset_configuration( dataset_id = dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id ) producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'eoi.agent.handler.usgs_stream_publisher', 'class':'UsgsPublisher' } configuration = { 'process':{ 'stream_id':stream_id, } } procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id) pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration) # Keep track, we'll kill 'em later. process_list.append(pid) # Get about 4 seconds of data time.sleep(4) #--------------------------- # Stop producing data #--------------------------- for process in process_list: process_dispatcher.cancel_process(process) #---------------------------------------------- # The replay and the transform, a love story. #---------------------------------------------- # Happy Valentines to the clever coder who catches the above! transform_definition = ProcessDefinition() transform_definition.executable = { 'module':'ion.processes.data.transforms.transform_example', 'class':'TransformCapture' } transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition) dataset_id = datasets.pop() # Just need one for now replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id) #-------------------------------------------- # I'm Selling magazine subscriptions here! #-------------------------------------------- subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]), exchange_name='transform_capture_point') #-------------------------------------------- # Start the transform (capture) #-------------------------------------------- transform_id = transform_management_service.create_transform( name='capture_transform', in_subscription_id=subscription, process_definition_id=transform_definition_id ) transform_management_service.activate_transform(transform_id=transform_id) #-------------------------------------------- # BEGIN REPLAY! #-------------------------------------------- data_retriever_service.start_replay(replay_id=replay_id) #-------------------------------------------- # Lets get some boundaries #-------------------------------------------- bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
def test_createDataProductVersionFromSim(self): # ctd simulator process producer_definition = ProcessDefinition(name='Example Data Producer') producer_definition.executable = { 'module':'ion.services.sa.test.simple_ctd_data_producer', 'class':'SimpleCtdDataProducer' } producer_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition) #------------------------------- # Create InstrumentDevice #------------------------------- instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" ) try: instDevice_id1 = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.damsclient.register_instrument(instDevice_id1) except BadRequest as ex: self.fail("failed to create new InstrumentDevice: %s" %ex) #------------------------------- # Create CTD Parsed as the first data product #------------------------------- # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubcli.create_stream_definition(container=ctd_stream_def) print 'test_createTransformsThenActivateInstrument: new Stream Definition id = ', ctd_stream_def_id print 'Creating new CDM data product with a stream definition' craft = CoverageCraft sdom, tdom = craft.create_domains() sdom = sdom.dump() tdom = tdom.dump() parameter_dictionary = craft.create_parameters() parameter_dictionary = parameter_dictionary.dump() dp_obj = IonObject(RT.DataProduct, name='ctd_parsed', description='ctd stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id, parameter_dictionary) print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product self.damsclient.assign_data_product(input_resource_id=instDevice_id1, data_product_id=ctd_parsed_data_product) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True) print 'test_createTransformsThenActivateInstrument: Data product streams1 = ', stream_ids self.parsed_stream_id = stream_ids[0] #------------------------------- # Streaming #------------------------------- # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':self.parsed_stream_id, } } producer_pid = self.processdispatchclient.schedule_process(process_definition_id= producer_procdef_id, configuration=configuration) time.sleep(2.0) # clean up the launched processes self.processdispatchclient.cancel_process(producer_pid) #------------------------------- # Create InstrumentDevice 2 #------------------------------- instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice2', description="SBE37IMDevice", serial_number="6789" ) try: instDevice_id2 = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.damsclient.register_instrument(instDevice_id2) except BadRequest as ex: self.fail("failed to create new InstrumentDevice2: %s" %ex) #------------------------------- # Create CTD Parsed as the new version of the original data product #------------------------------- # create a stream definition for the data from the ctd simulator dataproductversion_obj = IonObject(RT.DataProduct, name='CTDParsedV2', description="new version" , temporal_domain = tdom, spatial_domain = sdom) ctd_parsed_data_product_new_version = self.dataproductclient.create_data_product_version(ctd_parsed_data_product, dataproductversion_obj) print 'new ctd_parsed_data_product_version_id = ', ctd_parsed_data_product_new_version self.damsclient.assign_data_product(input_resource_id=instDevice_id1, data_product_id=ctd_parsed_data_product, data_product_version_id=ctd_parsed_data_product_new_version) #------------------------------- # ACTIVATE PERSISTANCE FOR DATA PRODUCT VERSIONS NOT IMPL YET!!!!!!!! #------------------------------- #self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product_new_version) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_new_version, PRED.hasStream, None, True) print 'test_createTransformsThenActivateInstrument: Data product streams2 = ', stream_ids self.parsed_stream_id2 = stream_ids[0] #------------------------------- # Streaming #------------------------------- # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':self.parsed_stream_id2, } } producer_pid = self.processdispatchclient.schedule_process(process_definition_id= producer_procdef_id, configuration=configuration) time.sleep(2.0) # clean up the launched processes self.processdispatchclient.cancel_process(producer_pid)
def test_workflow_components(self): cc = self.container assertions = self.assertTrue #------------------------------- # Create CTD Parsed as the initial data product #------------------------------- # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data') print 'Creating new CDM data product with a stream definition' dp_obj = IonObject(RT.DataProduct,name='ctd_parsed',description='ctd stream test') try: ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id) except Exception as ex: self.fail("failed to create new data product: %s" %ex) print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product, persist_data=True, persist_metadata=True) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) ctd_stream_id = stream_ids[0] ### ### Setup the first transformation ### # Salinity: Data Process Definition log.debug("Create data process definition SalinityTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_salinity', description='create a salinity data product', module='ion.processes.data.transforms.ctd.ctd_L2_salinity', class_name='SalinityTransform', process_source='SalinityTransform source code here...') try: ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except Excpetion as ex: self.fail("failed to create new SalinityTransform data process definition: %s" %ex) # create a stream definition for the data from the salinity Transform sal_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='L2_salinity') self.dataprocessclient.assign_stream_definition_to_data_process_definition(sal_stream_def_id, ctd_L2_salinity_dprocdef_id ) # Create the output data product of the transform log.debug("create output data product L2 Salinity") ctd_l2_salinity_output_dp_obj = IonObject(RT.DataProduct, name='L2_Salinity',description='transform output L2 salinity') ctd_l2_salinity_output_dp_id = self.dataproductclient.create_data_product(ctd_l2_salinity_output_dp_obj, sal_stream_def_id) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l2_salinity_output_dp_id, persist_data=True, persist_metadata=True) # Create the Salinity transform data process log.debug("create L2_salinity data_process and start it") try: l2_salinity_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L2_salinity_dprocdef_id, ctd_parsed_data_product, {'output':ctd_l2_salinity_output_dp_id}) self.dataprocessclient.activate_data_process(l2_salinity_all_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return") ### ### Setup the second transformation ### # Salinity: Data Process Definition log.debug("Create data process definition SalinityDoublerTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='salinity_doubler', description='create a salinity doubler data product', module='ion.processes.data.transforms.example_double_salinity', class_name='SalinityDoubler', process_source='SalinityDoubler source code here...') try: salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except Exception as ex: self.fail("failed to create new SalinityDoubler data process definition: %s" %ex) # create a stream definition for the data from the salinity Transform salinity_double_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityDoubler.outgoing_stream_def, name='SalinityDoubler') self.dataprocessclient.assign_stream_definition_to_data_process_definition(salinity_double_stream_def_id, salinity_doubler_dprocdef_id ) # Create the output data product of the transform log.debug("create output data product SalinityDoubler") salinity_doubler_output_dp_obj = IonObject(RT.DataProduct, name='SalinityDoubler',description='transform output salinity doubler') salinity_doubler_output_dp_id = self.dataproductclient.create_data_product(salinity_doubler_output_dp_obj, salinity_double_stream_def_id) self.dataproductclient.activate_data_product_persistence(data_product_id=salinity_doubler_output_dp_id, persist_data=True, persist_metadata=True) # Create the Salinity transform data process log.debug("create L2_salinity data_process and start it") try: salinity_double_data_process_id = self.dataprocessclient.create_data_process(salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id, {'output':salinity_doubler_output_dp_id}) self.dataprocessclient.activate_data_process(salinity_double_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return") ### ### Start the process for producing the CTD data ### # process definition for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.ctd_stream_publisher', 'class':'SimpleCtdPublisher' } ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition) # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':ctd_stream_id, } } ctd_sim_pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration) ## get the stream id for the transform outputs stream_ids, _ = self.rrclient.find_objects(ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) sal_stream_id = stream_ids[0] stream_ids, _ = self.rrclient.find_objects(salinity_doubler_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) sal_dbl_stream_id = stream_ids[0] ### ### Make a subscriber in the test to listen for transformed data ### salinity_subscription_id = self.pubsubclient.create_subscription( query=StreamQuery([ctd_stream_id, sal_stream_id,sal_dbl_stream_id]), exchange_name = 'salinity_test', name = "test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn(' data received!') results.append(message) if len(results) >15: result.set(True) subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=20)) #Stop the transform process # stop the flow parse the messages... self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data first_salinity_values = None for message in results: try: psd = PointSupplementStreamParser(stream_definition=ctd_stream_def, stream_granule=message) temp = psd.get_values('temperature') print psd.list_field_names() except KeyError as ke: temp = None if temp is not None: assertions(isinstance(temp, numpy.ndarray)) print 'temperature=' + str(numpy.nanmin(temp)) first_salinity_values = None else: psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) print psd.list_field_names() # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') print 'salinity=' + str(numpy.nanmin(salinity)) assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0 if first_salinity_values is None: first_salinity_values = salinity.tolist() else: second_salinity_values = salinity.tolist() assertions(len(first_salinity_values) == len(second_salinity_values)) for idx in range(0,len(first_salinity_values)): assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])
def test_createTransformsThenPublishGranules(self): # ctd simulator process producer_definition = ProcessDefinition(name='Example Data Producer') producer_definition.executable = { 'module':'ion.services.sa.test.simple_ctd_data_producer', 'class':'SimpleCtdDataProducer' } producer_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition) #------------------------------- # Create CTD Parsed as the first data product #------------------------------- # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def) print 'test_createTransformsThenActivateInstrument: new Stream Definition id = ', ctd_stream_def_id print 'Creating new CDM data product with a stream definition' craft = CoverageCraft sdom, tdom = craft.create_domains() sdom = sdom.dump() tdom = tdom.dump() parameter_dictionary = craft.create_parameters() parameter_dictionary = parameter_dictionary.dump() dp_obj = IonObject(RT.DataProduct, name='ctd_parsed', description='ctd stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id, parameter_dictionary) print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product #self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product) #self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product)) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True) print 'test_createTransformsThenActivateInstrument: Data product streams1 = ', stream_ids self.parsed_stream_id = stream_ids[0] #------------------------------- # Create CTD Raw as the second data product #------------------------------- print 'test_createTransformsThenActivateInstrument: Creating new RAW data product with a stream definition' raw_stream_def = SBE37_RAW_stream_definition() raw_stream_def_id = self.pubsubclient.create_stream_definition(container=raw_stream_def) dp_obj = IonObject(RT.DataProduct, name='ctd_raw', description='raw stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_raw_data_product = self.dataproductclient.create_data_product(dp_obj, raw_stream_def_id, parameter_dictionary) print 'new ctd_raw_data_product_id = ', ctd_raw_data_product #self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_raw_data_product) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_raw_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_raw_data_product, PRED.hasStream, None, True) print 'Data product streams2 = ', stream_ids #------------------------------- # L0 Conductivity - Temperature - Pressure: Data Process Definition #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create data process definition ctd_L0_all") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all', process_source='some_source_reference') try: ctd_L0_all_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new ctd_L0_all data process definition: %s" %ex) #------------------------------- # L1 Conductivity: Data Process Definition #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create data process definition CTDL1ConductivityTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L1_conductivity', description='create the L1 conductivity data product', module='ion.processes.data.transforms.ctd.ctd_L1_conductivity', class_name='CTDL1ConductivityTransform', process_source='CTDL1ConductivityTransform source code here...') try: ctd_L1_conductivity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new CTDL1ConductivityTransform data process definition: %s" %ex) #------------------------------- # L1 Pressure: Data Process Definition #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create data process definition CTDL1PressureTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L1_pressure', description='create the L1 pressure data product', module='ion.processes.data.transforms.ctd.ctd_L1_pressure', class_name='CTDL1PressureTransform', process_source='CTDL1PressureTransform source code here...') try: ctd_L1_pressure_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new CTDL1PressureTransform data process definition: %s" %ex) #------------------------------- # L1 Temperature: Data Process Definition #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create data process definition CTDL1TemperatureTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L1_temperature', description='create the L1 temperature data product', module='ion.processes.data.transforms.ctd.ctd_L1_temperature', class_name='CTDL1TemperatureTransform', process_source='CTDL1TemperatureTransform source code here...') try: ctd_L1_temperature_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new CTDL1TemperatureTransform data process definition: %s" %ex) #------------------------------- # L2 Salinity: Data Process Definition #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create data process definition SalinityTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L2_salinity', description='create the L1 temperature data product', module='ion.processes.data.transforms.ctd.ctd_L2_salinity', class_name='SalinityTransform', process_source='SalinityTransform source code here...') try: ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new SalinityTransform data process definition: %s" %ex) #------------------------------- # L2 Density: Data Process Definition #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create data process definition DensityTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L2_density', description='create the L1 temperature data product', module='ion.processes.data.transforms.ctd.ctd_L2_density', class_name='DensityTransform', process_source='DensityTransform source code here...') try: ctd_L2_density_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new DensityTransform data process definition: %s" %ex) self.loggerpids = [] #------------------------------- # L0 Conductivity - Temperature - Pressure: Output Data Products #------------------------------- outgoing_stream_l0_conductivity = L0_conductivity_stream_definition() outgoing_stream_l0_conductivity_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_conductivity, name='L0_Conductivity') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_conductivity_id, ctd_L0_all_dprocdef_id ) outgoing_stream_l0_pressure = L0_pressure_stream_definition() outgoing_stream_l0_pressure_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_pressure, name='L0_Pressure') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_pressure_id, ctd_L0_all_dprocdef_id ) outgoing_stream_l0_temperature = L0_temperature_stream_definition() outgoing_stream_l0_temperature_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_temperature, name='L0_Temperature') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_temperature_id, ctd_L0_all_dprocdef_id ) self.output_products={} log.debug("test_createTransformsThenActivateInstrument: create output data product L0 conductivity") ctd_l0_conductivity_output_dp_obj = IonObject(RT.DataProduct, name='L0_Conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_conductivity_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_conductivity_output_dp_obj, outgoing_stream_l0_conductivity_id, parameter_dictionary) self.output_products['conductivity'] = ctd_l0_conductivity_output_dp_id self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_conductivity_output_dp_id) log.debug("test_createTransformsThenActivateInstrument: create output data product L0 pressure") ctd_l0_pressure_output_dp_obj = IonObject(RT.DataProduct, name='L0_Pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_pressure_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_pressure_output_dp_obj, outgoing_stream_l0_pressure_id, parameter_dictionary) self.output_products['pressure'] = ctd_l0_pressure_output_dp_id self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_pressure_output_dp_id) log.debug("test_createTransformsThenActivateInstrument: create output data product L0 temperature") ctd_l0_temperature_output_dp_obj = IonObject(RT.DataProduct, name='L0_Temperature', description='transform output temperature', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_temperature_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_temperature_output_dp_obj, outgoing_stream_l0_temperature_id, parameter_dictionary) self.output_products['temperature'] = ctd_l0_temperature_output_dp_id self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_temperature_output_dp_id) #------------------------------- # L1 Conductivity - Temperature - Pressure: Output Data Products #------------------------------- outgoing_stream_l1_conductivity = L1_conductivity_stream_definition() outgoing_stream_l1_conductivity_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l1_conductivity, name='L1_conductivity') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l1_conductivity_id, ctd_L1_conductivity_dprocdef_id ) outgoing_stream_l1_pressure = L1_pressure_stream_definition() outgoing_stream_l1_pressure_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l1_pressure, name='L1_Pressure') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l1_pressure_id, ctd_L1_pressure_dprocdef_id ) outgoing_stream_l1_temperature = L1_temperature_stream_definition() outgoing_stream_l1_temperature_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l1_temperature, name='L1_Temperature') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l1_temperature_id, ctd_L1_temperature_dprocdef_id ) log.debug("test_createTransformsThenActivateInstrument: create output data product L1 conductivity") ctd_l1_conductivity_output_dp_obj = IonObject(RT.DataProduct, name='L1_Conductivity', description='transform output L1 conductivity', temporal_domain = tdom, spatial_domain = sdom) ctd_l1_conductivity_output_dp_id = self.dataproductclient.create_data_product(ctd_l1_conductivity_output_dp_obj, outgoing_stream_l1_conductivity_id, parameter_dictionary) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l1_conductivity_output_dp_id) stream_ids, _ = self.rrclient.find_objects(ctd_l1_conductivity_output_dp_id, PRED.hasStream, None, True) log.debug(" ctd_l1_conductivity stream id = %s", str(stream_ids) ) pid = self.create_logger(' ctd_l1_conductivity', stream_ids[0] ) self.loggerpids.append(pid) log.debug("test_createTransformsThenActivateInstrument: create output data product L1 pressure") ctd_l1_pressure_output_dp_obj = IonObject(RT.DataProduct, name='L1_Pressure', description='transform output L1 pressure', temporal_domain = tdom, spatial_domain = sdom) ctd_l1_pressure_output_dp_id = self.dataproductclient.create_data_product(ctd_l1_pressure_output_dp_obj, outgoing_stream_l1_pressure_id, parameter_dictionary) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l1_pressure_output_dp_id) stream_ids, _ = self.rrclient.find_objects(ctd_l1_pressure_output_dp_id, PRED.hasStream, None, True) log.debug(" ctd_l1_pressure stream id = %s", str(stream_ids) ) pid = self.create_logger(' ctd_l1_pressure', stream_ids[0] ) self.loggerpids.append(pid) log.debug("test_createTransformsThenActivateInstrument: create output data product L1 temperature") ctd_l1_temperature_output_dp_obj = IonObject(RT.DataProduct, name='L1_Temperature', description='transform output L1 temperature', temporal_domain = tdom, spatial_domain = sdom) ctd_l1_temperature_output_dp_id = self.dataproductclient.create_data_product(ctd_l1_temperature_output_dp_obj, outgoing_stream_l1_temperature_id, parameter_dictionary) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l1_temperature_output_dp_id) stream_ids, _ = self.rrclient.find_objects(ctd_l1_temperature_output_dp_id, PRED.hasStream, None, True) log.debug(" ctd_l1_temperature stream id = %s", str(stream_ids) ) pid = self.create_logger(' ctd_l1_temperature', stream_ids[0] ) self.loggerpids.append(pid) #------------------------------- # L2 Salinity - Density: Output Data Products #------------------------------- outgoing_stream_l2_salinity = L2_practical_salinity_stream_definition() outgoing_stream_l2_salinity_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l2_salinity, name='L2_salinity') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l2_salinity_id, ctd_L2_salinity_dprocdef_id ) outgoing_stream_l2_density = L2_density_stream_definition() outgoing_stream_l2_density_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l2_density, name='L2_Density') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l2_density_id, ctd_L2_density_dprocdef_id ) log.debug("test_createTransformsThenActivateInstrument: create output data product L2 Salinity") ctd_l2_salinity_output_dp_obj = IonObject(RT.DataProduct, name='L2_Salinity', description='transform output L2 salinity', temporal_domain = tdom, spatial_domain = sdom) ctd_l2_salinity_output_dp_id = self.dataproductclient.create_data_product(ctd_l2_salinity_output_dp_obj, outgoing_stream_l2_salinity_id, parameter_dictionary) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l2_salinity_output_dp_id) log.debug("test_createTransformsThenActivateInstrument: create output data product L2 Density") ctd_l2_density_output_dp_obj = IonObject(RT.DataProduct, name='L2_Density', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) ctd_l2_density_output_dp_id = self.dataproductclient.create_data_product(ctd_l2_density_output_dp_obj, outgoing_stream_l2_density_id, parameter_dictionary) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l2_density_output_dp_id) # Set up subscribers/loggers to these streams stream_ids, _ = self.rrclient.find_objects(ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True) log.debug("L2 salinity stream id = %s", str(stream_ids) ) pid = self.create_logger('L2_salinity', stream_ids[0] ) self.loggerpids.append(pid) stream_ids, _ = self.rrclient.find_objects(ctd_l2_density_output_dp_id, PRED.hasStream, None, True) log.debug("L2 density stream id = %s", str(stream_ids) ) pid = self.create_logger('L2_density', stream_ids[0] ) self.loggerpids.append(pid) #------------------------------- # L0 Conductivity - Temperature - Pressure: Create the data process #------------------------------- log.debug("test_createTransformsThenActivateInstrument: create L0 all data_process start") try: ctd_l0_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L0_all_dprocdef_id, [ctd_parsed_data_product], self.output_products) self.dataprocessclient.activate_data_process(ctd_l0_all_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L0 all data_process return") #------------------------------- # L1 Conductivity: Create the data process #------------------------------- log.debug("test_createTransformsThenActivateInstrument: create L1 Conductivity data_process start") try: l1_conductivity_data_process_id = self.dataprocessclient.create_data_process(ctd_L1_conductivity_dprocdef_id, [ctd_l0_conductivity_output_dp_id], {'output':ctd_l1_conductivity_output_dp_id}) self.dataprocessclient.activate_data_process(l1_conductivity_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L1 Conductivity data_process return") #------------------------------- # L1 Pressure: Create the data process #------------------------------- log.debug("test_createTransformsThenActivateInstrument: create L1_Pressure data_process start") try: l1_pressure_data_process_id = self.dataprocessclient.create_data_process(ctd_L1_pressure_dprocdef_id, [ctd_l0_pressure_output_dp_id], {'output':ctd_l1_pressure_output_dp_id}) self.dataprocessclient.activate_data_process(l1_pressure_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L1_Pressure data_process return") #------------------------------- # L1 Temperature: Create the data process #------------------------------- log.debug("test_createTransformsThenActivateInstrument: create L1_Pressure data_process start") try: l1_temperature_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L1_temperature_dprocdef_id, [ctd_l0_temperature_output_dp_id], {'output':ctd_l1_temperature_output_dp_id}) self.dataprocessclient.activate_data_process(l1_temperature_all_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L1_Pressure data_process return") #------------------------------- # L2 Salinity: Create the data process #------------------------------- log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process start") try: l2_salinity_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L2_salinity_dprocdef_id, [ctd_parsed_data_product], {'output':ctd_l2_salinity_output_dp_id}) self.dataprocessclient.activate_data_process(l2_salinity_all_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return") #------------------------------- # L2 Density: Create the data process #------------------------------- log.debug("test_createTransformsThenActivateInstrument: create L2_Density data_process start") try: l2_density_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L2_density_dprocdef_id, [ctd_parsed_data_product], {'output':ctd_l2_density_output_dp_id}) self.dataprocessclient.activate_data_process(l2_density_all_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L2_Density data_process return") #------------------------------- # Streaming #------------------------------- # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':self.parsed_stream_id, } } producer_pid = self.processdispatchclient.schedule_process(process_definition_id= producer_procdef_id, configuration=configuration) time.sleep(2.0) # clean up the launched processes self.processdispatchclient.cancel_process(producer_pid) for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid)
def test_raw_stream_integration(self): cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there was one stream definitions... ### # create a stream definition for the data from the ctd simulator raw_ctd_stream_def = SBE37_RAW_stream_definition() raw_ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=raw_ctd_stream_def, name='Simulated RAW CTD data') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.raw_stream_publisher', 'class': 'RawStreamPublisher' } raw_ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream raw_ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=raw_ctd_stream_def_id) # Set up the datasets raw_ctd_dataset_id = dataset_management_service.create_dataset( stream_id=raw_ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset raw_ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=raw_ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': raw_ctd_stream_id, } } raw_sim_pid = process_dispatcher.schedule_process( process_definition_id=raw_ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### raw_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ raw_ctd_stream_id, ]), exchange_name='raw_test', name="test raw subscription", ) # this is okay - even in cei mode! pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Raw data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='raw_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=raw_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( raw_sim_pid ) # kill the ctd simulator process - that is enough data gevent.sleep(1) for message in results: sha1 = message.identifiables['stream_encoding'].sha1 data = message.identifiables['data_stream'].values filename = FileSystem.get_hierarchical_url(FS.CACHE, sha1, ".raw") with open(filename, 'r') as f: assertions(data == f.read())
def test_dm_integration(self): ''' test_salinity_transform Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there were two stream definitions... ### # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data') # create a stream definition for the data from the salinity Transform sal_stream_def_id = pubsub_management_service.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.ctd_stream_publisher', 'class':'SimpleCtdPublisher' } ctd_sim_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) # one for the salinity transform producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'class':'SalinityTransform' } salinity_transform_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=1 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream ctd_stream_id = pubsub_management_service.create_stream(stream_definition_id=ctd_stream_def_id) # Set up the datasets ctd_dataset_id = dataset_management_service.create_dataset( stream_id=ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Configure ingestion of this dataset ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id = ctd_dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service #--------------------------- # Set up the salinity transform #--------------------------- # Create the stream sal_stream_id = pubsub_management_service.create_stream(stream_definition_id=sal_stream_def_id) # Set up the datasets sal_dataset_id = dataset_management_service.create_dataset( stream_id=sal_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Configure ingestion of the salinity as a dataset sal_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id = sal_dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Create a subscription as input to the transform sal_transform_input_subscription_id = pubsub_management_service.create_subscription( query = StreamQuery(stream_ids=[ctd_stream_id,]), exchange_name='salinity_transform_input') # how do we make these names??? i.e. Should they be anonymous? # create the salinity transform sal_transform_id = transform_management_service.create_transform( name='example salinity transform', in_subscription_id=sal_transform_input_subscription_id, out_streams={'output':sal_stream_id,}, process_definition_id = salinity_transform_procdef_id, # no configuration needed at this time... ) # start the transform - for a test case it makes sense to do it before starting the producer but it is not required transform_management_service.activate_transform(transform_id=sal_transform_id) # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':ctd_stream_id, } } ctd_sim_pid = process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### salinity_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([sal_stream_id,]), exchange_name = 'salinity_test', name = "test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Salinity data received!') results.append(message) if len(results) >3: result.set(True) subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription(subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data for message in results: psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') import numpy assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
def create_data_process(self, data_process_definition_id='', in_data_product_id='', out_data_product_id=''): """ @param data_process_definition_id: Object with definition of the transform to apply to the input data product @param in_data_product_id: ID of the input data product @param out_data_product_id: ID of the output data product @retval data_process_id: ID of the newly created data process object """ # # # #todo: break this method up into: 1. create data process, 2. assign in/out products, 3. activate data proces # # # inform = "Input Data Product: "+str(in_data_product_id)+\ "Transformed by: "+str(data_process_definition_id)+\ "To create output Product: "+str(out_data_product_id) log.debug("DataProcessManagementService:create_data_process()\n" + inform) # Create and store a new DataProcess with the resource registry log.debug("DataProcessManagementService:create_data_process - Create and store a new DataProcess with the resource registry") data_process_def_obj = self.read_data_process_definition(data_process_definition_id) data_process_name = "process_" + data_process_def_obj.name \ + " - calculates " + \ str(out_data_product_id) + time.ctime() self.data_process = IonObject(RT.DataProcess, name=data_process_name) data_process_id, version = self.clients.resource_registry.create(self.data_process) log.debug("DataProcessManagementService:create_data_process - Create and store a new DataProcess with the resource registry data_process_id: " + str(data_process_id)) # Register the data process instance as a data producer with DataAcquisitionMgmtSvc log.debug("DataProcessManagementService:create_data_process - Register the data process instance as a data producer with DataAcquisitionMgmtSvc, then retrieve the id of the OUTPUT stream") #TODO: should this be outside this method? Called by orchastration? data_producer_id = self.clients.data_acquisition_management.register_process(data_process_id) #Assign the output Data Product to this producer resource #todo: check that the product is not already associated with a producer #TODO: should this be outside this method? Called by orchastration? self.clients.data_acquisition_management.assign_data_product(data_process_id, out_data_product_id, True) # Associate with dataProcess self.clients.resource_registry.create_association(data_process_definition_id, PRED.hasInstance, data_process_id) self.clients.resource_registry.create_association(data_process_id, PRED.hasInputProduct, in_data_product_id) self.clients.resource_registry.create_association(data_process_id, PRED.hasOutputProduct, out_data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.clients.resource_registry.find_objects(out_data_product_id, PRED.hasStream, None, True) if not stream_ids: raise NotFound("No Stream created for output Data Product " + str(out_data_product_id)) if len(stream_ids) != 1: raise BadRequest("Data Product should only have ONE stream at this time" + str(out_data_product_id)) out_stream_id = stream_ids[0] log.debug("DataProcessManagementService:create_data_process -Register the data process instance as a data producer with DataAcquisitionMgmtSvc, then retrieve the id of the OUTPUT stream out_stream_id: " + str(out_stream_id)) #------------------------------- # Create subscription from in_data_product, which should already be associated with a stream via the Data Producer #------------------------------- # # first - get the data producer associated with this IN data product # log.debug("DataProcessManagementService:create_data_process - get the data producer associated with this IN data product") # producer_ids, _ = self.clients.resource_registry.find_objects(in_data_product_id, PRED.hasDataProducer, RT.DataProducer, True) # if not producer_ids: # raise NotFound("No Data Producer created for this Data Product " + str(in_data_product_id)) # if len(producer_ids) != 1: # raise BadRequest("Data Product should only have ONE Data Producers at this time" + str(in_data_product_id)) # in_product_producer = producer_ids[0] # log.debug("DataProcessManagementService:create_data_process - get the data producer associated with this IN data product in_product_producer: " + str(in_product_producer)) # second - get the stream associated with this IN data product log.debug("DataProcessManagementService:create_data_process - get the stream associated with this IN data product") stream_ids, _ = self.clients.resource_registry.find_objects(in_data_product_id, PRED.hasStream, RT.Stream, True) if not stream_ids: raise NotFound("No Stream created for this IN Data Product " + str(in_data_product_id)) if len(stream_ids) != 1: raise BadRequest("IN Data Product should only have ONE stream at this time" + str(in_data_product_id)) in_stream_id = stream_ids[0] log.debug("DataProcessManagementService:create_data_process - get the stream associated with this IN data product in_stream_id" + str(in_stream_id)) # Finally - create a subscription to the input stream log.debug("DataProcessManagementService:create_data_process - Finally - create a subscription to the input stream") in_data_product_obj = self.clients.data_product_management.read_data_product(in_data_product_id) query = StreamQuery(stream_ids=[in_stream_id]) self.input_subscription_id = self.clients.pubsub_management.create_subscription(query=query, exchange_name=in_data_product_obj.name) log.debug("DataProcessManagementService:create_data_process - Finally - create a subscription to the input stream input_subscription_id" + str(self.input_subscription_id)) # add the subscription id to the resource for clean up later data_process_obj = self.clients.resource_registry.read(data_process_id) data_process_obj.input_subscription_id = self.input_subscription_id; self.clients.resource_registry.update(data_process_obj) #------------------------------- # Process Definition #------------------------------- # Create the process definition for the basic transform transform_definition = ProcessDefinition() transform_definition.executable = { 'module':data_process_def_obj.module, 'class':data_process_def_obj.class_name } transform_definition_id = self.clients.process_dispatcher.create_process_definition(process_definition=transform_definition) # Launch the first transform process log.debug("DataProcessManagementService:create_data_process - Launch the first transform process: ") log.debug("DataProcessManagementService:create_data_process - input_subscription_id: " + str(self.input_subscription_id) ) log.debug("DataProcessManagementService:create_data_process - out_stream_id: " + str(out_stream_id) ) log.debug("DataProcessManagementService:create_data_process - transform_definition_id: " + str(transform_definition_id) ) log.debug("DataProcessManagementService:create_data_process - data_process_id: " + str(data_process_id) ) transform_id = self.clients.transform_management.create_transform( name='data_process_id', description='data_process_id', in_subscription_id=self.input_subscription_id, out_streams={'output':out_stream_id}, process_definition_id=transform_definition_id, configuration={}) log.debug("DataProcessManagementService:create_data_process - transform_id: " + str(transform_id) ) self.clients.resource_registry.create_association(data_process_id, PRED.hasTransform, transform_id) log.debug("DataProcessManagementService:create_data_process - Launch the first transform process transform_id" + str(transform_id)) # TODO: Flesh details of transform mgmt svc schedule and bind methods # self.clients.transform_management_service.schedule_transform(transform_id) # self.clients.transform_management_service.bind_transform(transform_id) # TODO: Where should activate take place? log.debug("DataProcessManagementService:create_data_process - transform_management.activate_transform") self.clients.transform_management.activate_transform(transform_id) return data_process_id
def test_dm_integration(self): ''' test_salinity_transform Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there were two stream definitions... ### # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=ctd_stream_def, name='Simulated CTD data') # create a stream definition for the data from the salinity Transform sal_stream_def_id = pubsub_management_service.create_stream_definition( container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.ctd_stream_publisher', 'class': 'SimpleCtdPublisher' } ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) # one for the salinity transform producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'class': 'SalinityTransform' } salinity_transform_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=ctd_stream_def_id) # Set up the datasets ctd_dataset_id = dataset_management_service.create_dataset( stream_id=ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service #--------------------------- # Set up the salinity transform #--------------------------- # Create the stream sal_stream_id = pubsub_management_service.create_stream( stream_definition_id=sal_stream_def_id) # Set up the datasets sal_dataset_id = dataset_management_service.create_dataset( stream_id=sal_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of the salinity as a dataset sal_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=sal_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Create a subscription as input to the transform sal_transform_input_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery(stream_ids=[ ctd_stream_id, ]), exchange_name='salinity_transform_input' ) # how do we make these names??? i.e. Should they be anonymous? # create the salinity transform sal_transform_id = transform_management_service.create_transform( name='example salinity transform', in_subscription_id=sal_transform_input_subscription_id, out_streams={ 'output': sal_stream_id, }, process_definition_id=salinity_transform_procdef_id, # no configuration needed at this time... ) # start the transform - for a test case it makes sense to do it before starting the producer but it is not required transform_management_service.activate_transform( transform_id=sal_transform_id) # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': ctd_stream_id, } } ctd_sim_pid = process_dispatcher.schedule_process( process_definition_id=ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### salinity_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ sal_stream_id, ]), exchange_name='salinity_test', name="test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Salinity data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data for message in results: psd = PointSupplementStreamParser( stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') import numpy assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0