def assign_stream_definition_to_data_process_definition( self, stream_definition_id='', data_process_definition_id='', binding=''): """Connect the output stream with a data process definition """ # Verify that both ids are valid, RR will throw if not found stream_definition_obj = self.clients.resource_registry.read( stream_definition_id) data_process_definition_obj = self.clients.resource_registry.read( data_process_definition_id) validate_is_not_none( stream_definition_obj, "No stream definition object found for stream definition id: %s" % stream_definition_id) validate_is_not_none(data_process_definition_obj, "No data process definition object found for data process"\ " definition id: %s" % data_process_definition_id) self.clients.resource_registry.create_association( data_process_definition_id, PRED.hasStreamDefinition, stream_definition_id) if binding: data_process_definition_obj.output_bindings[ binding] = stream_definition_id self.clients.resource_registry.update(data_process_definition_obj)
def _launch_process(self, queue_name='', out_streams=None, process_definition_id='', configuration=None): """ Launches the process """ # ------------------------------------------------------------------------------------ # Spawn Configuration and Parameters # ------------------------------------------------------------------------------------ configuration['process'] = { 'queue_name':queue_name, 'publish_streams' : out_streams } # ------------------------------------------------------------------------------------ # Process Spawning # ------------------------------------------------------------------------------------ # Spawn the process pid = self.clients.process_dispatcher.schedule_process( process_definition_id=process_definition_id, configuration=configuration ) validate_is_not_none( pid, "Process could not be spawned") return pid
def _launch_process(self, queue_name='', out_streams=None, process_definition_id='', configuration=None): """ Launches the process """ # ------------------------------------------------------------------------------------ # Spawn Configuration and Parameters # ------------------------------------------------------------------------------------ if 'process' not in configuration: configuration['process'] = {} configuration['process']['queue_name'] = queue_name configuration['process']['publish_streams'] = out_streams # Setting the restart mode schedule = ProcessSchedule() schedule.restart_mode = ProcessRestartMode.ABNORMAL schedule.queueing_mode = ProcessQueueingMode.ALWAYS # ------------------------------------------------------------------------------------ # Process Spawning # ------------------------------------------------------------------------------------ # Spawn the process pid = self.clients.process_dispatcher.schedule_process( process_definition_id=process_definition_id, schedule=schedule, configuration=configuration) validate_is_not_none(pid, "Process could not be spawned") return pid
def assign_stream_definition_to_data_process_definition( self, stream_definition_id="", data_process_definition_id="", binding="" ): """Connect the output stream with a data process definition """ # Verify that both ids are valid, RR will throw if not found stream_definition_obj = self.clients.resource_registry.read(stream_definition_id) data_process_definition_obj = self.clients.resource_registry.read(data_process_definition_id) validate_is_not_none( stream_definition_obj, "No stream definition object found for stream definition id: %s" % stream_definition_id, ) validate_is_not_none( data_process_definition_obj, "No data process definition object found for data process" " definition id: %s" % data_process_definition_id, ) self.clients.resource_registry.create_association( data_process_definition_id, PRED.hasStreamDefinition, stream_definition_id ) if binding: data_process_definition_obj.output_bindings[binding] = stream_definition_id self.clients.resource_registry.update(data_process_definition_obj)
def create_dataset(self, name='', datastore_name='', view_name='', stream_id='', parameter_dict=None, spatial_domain=None, temporal_domain=None, parameter_dictionary_id='', description=''): validate_true(parameter_dict or parameter_dictionary_id, 'A parameter dictionary must be supplied to register a new dataset.') validate_is_not_none(spatial_domain, 'A spatial domain must be supplied to register a new dataset.') validate_is_not_none(temporal_domain, 'A temporal domain must be supplied to register a new dataset.') if parameter_dictionary_id: pd = self.read_parameter_dictionary(parameter_dictionary_id) pcs = self.read_parameter_contexts(parameter_dictionary_id, id_only=False) parameter_dict = self._merge_contexts([ParameterContext.load(i.parameter_context) for i in pcs], pd.temporal_context) parameter_dict = parameter_dict.dump() dataset = Dataset() dataset.description = description dataset.name = name dataset.primary_view_key = stream_id or None dataset.datastore_name = datastore_name or self.DEFAULT_DATASTORE dataset.view_name = view_name or self.DEFAULT_VIEW dataset.parameter_dictionary = parameter_dict dataset.temporal_domain = temporal_domain dataset.spatial_domain = spatial_domain dataset.registered = False dataset_id, _ = self.clients.resource_registry.create(dataset) if stream_id: self.add_stream(dataset_id,stream_id) log.debug('creating dataset: %s', dataset_id) cov = self._create_coverage(dataset_id, description or dataset_id, parameter_dict, spatial_domain, temporal_domain) self._save_coverage(cov) cov.close() return dataset_id
def suspend_data_product_persistence(self, data_product_id=''): """Suspend data product data persistence into a data set, multiple options @param data_product_id str @param type str @throws NotFound object with specified id does not exist """ #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.clients.resource_registry.read(data_product_id) validate_is_not_none(data_product_obj, 'Should not have been empty') validate_is_instance(data_product_obj, DataProduct) if data_product_obj.dataset_configuration_id is None: raise NotFound("Data Product %s dataset configuration does not exist" % data_product_id) #-------------------------------------------------------------------------------- # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw #streams = self.data_product.find_stemming_stream(data_product_id) #-------------------------------------------------------------------------------- stream_id = self._get_stream_id(data_product_id) validate_is_not_none(stream_id, 'Data Product %s must have one stream associated' % str(data_product_id)) ret = self.clients.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=data_product_obj.dataset_configuration_id)
def add_data_product_version_to_collection(self, data_product_id='', data_product_collection_id='', version_name='', version_description=''): dp_collection_obj =self.clients.resource_registry.read(data_product_collection_id) #retrieve the stream definition for both the new data product to add to this collection and the base data product for this collection new_data_product_obj = self.clients.resource_registry.read(data_product_id) new_data_product_streams, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) validate_is_not_none(new_data_product_streams, 'The data product to add to the collection must have an associated stream') new_data_product_streamdefs, _ = self.clients.resource_registry.find_objects(subject=new_data_product_streams[0], predicate=PRED.hasStreamDefinition, object_type=RT.StreamDefinition, id_only=True) base_data_product_id = dp_collection_obj.version_list[0].data_product_id base_data_product_obj = self.clients.resource_registry.read(base_data_product_id) base_data_product_streams, _ = self.clients.resource_registry.find_objects(subject=base_data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) validate_is_not_none(base_data_product_streams, 'The base data product in the collection must have an associated stream') base_data_product_streamdefs, _ = self.clients.resource_registry.find_objects(subject=base_data_product_streams[0], predicate=PRED.hasStreamDefinition, object_type=RT.StreamDefinition, id_only=True) if not self.clients.pubsub_management.compare_stream_definition(stream_definition1_id=new_data_product_streamdefs[0], stream_definition2_id=base_data_product_streamdefs[0]): raise BadRequest("All Data Products in a collection must have equivelent stream definitions.") #todo: validate that the spatial/temporal domain match the base data product dpv = DataProductVersion() dpv.name = version_name dpv.description = version_description dpv.data_product_id = data_product_id dp_collection_obj.version_list.append(dpv) self.clients.resource_registry.update(dp_collection_obj) self.clients.resource_registry.create_association( subject=data_product_collection_id, predicate=PRED.hasVersion, object=data_product_id) return
def _get_input_stream_ids(self, in_data_product_ids=None): input_stream_ids = [] #------------------------------------------------------------------------------------------------------------------------------------------ # get the streams associated with this IN data products #------------------------------------------------------------------------------------------------------------------------------------------ for in_data_product_id in in_data_product_ids: # Get the stream associated with this input data product stream_ids, _ = self.clients.resource_registry.find_objects( in_data_product_id, PRED.hasStream, RT.Stream, True) validate_is_not_none( stream_ids, "No Stream created for this input Data Product " + str(in_data_product_id)) validate_is_not_none( len(stream_ids) != 1, "Input Data Product should only have ONE stream" + str(in_data_product_id)) # We take for now one stream_id associated with each input data product input_stream_ids.append(stream_ids[0]) return input_stream_ids
def _launch_process(self, queue_name='', out_streams=None, process_definition_id='', configuration=None): """ Launches the process """ # ------------------------------------------------------------------------------------ # Spawn Configuration and Parameters # ------------------------------------------------------------------------------------ if 'process' not in configuration: configuration['process'] = {} configuration['process']['queue_name'] = queue_name configuration['process']['publish_streams'] = out_streams # Setting the restart mode schedule = ProcessSchedule() schedule.restart_mode = ProcessRestartMode.ABNORMAL # ------------------------------------------------------------------------------------ # Process Spawning # ------------------------------------------------------------------------------------ # Spawn the process pid = self.clients.process_dispatcher.schedule_process( process_definition_id=process_definition_id, schedule= schedule, configuration=configuration ) validate_is_not_none( pid, "Process could not be spawned") return pid
def on_start(self): SimpleProcess.on_start(self) self.data_retriever = DataRetrieverServiceProcessClient(process=self) self.interval_key = self.CFG.get_safe('process.interval_key',None) self.qc_params = self.CFG.get_safe('process.qc_params',[]) validate_is_not_none(self.interval_key, 'An interval key is necessary to paunch this process') self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True) self.add_endpoint(self.event_subscriber) self.resource_registry = self.container.resource_registry
def create_dataset(self, name='', datastore_name='', view_name='', stream_id='', parameter_dict=None, spatial_domain=None, temporal_domain=None, parameter_dictionary_id='', description=''): validate_true( parameter_dict or parameter_dictionary_id, 'A parameter dictionary must be supplied to register a new dataset.' ) validate_is_not_none( spatial_domain, 'A spatial domain must be supplied to register a new dataset.') validate_is_not_none( temporal_domain, 'A temporal domain must be supplied to register a new dataset.') if parameter_dictionary_id: pd = self.read_parameter_dictionary(parameter_dictionary_id) pcs = self.read_parameter_contexts(parameter_dictionary_id, id_only=False) parameter_dict = self._merge_contexts( [ParameterContext.load(i.parameter_context) for i in pcs], pd.temporal_context) parameter_dict = parameter_dict.dump() dataset = Dataset() dataset.description = description dataset.name = name dataset.primary_view_key = stream_id or None dataset.datastore_name = datastore_name or self.DEFAULT_DATASTORE dataset.view_name = view_name or self.DEFAULT_VIEW dataset.parameter_dictionary = parameter_dict dataset.temporal_domain = temporal_domain dataset.spatial_domain = spatial_domain dataset.registered = False dataset_id, _ = self.clients.resource_registry.create(dataset) if stream_id: self.add_stream(dataset_id, stream_id) log.debug('creating dataset: %s', dataset_id) cov = self._create_coverage(dataset_id, description or dataset_id, parameter_dict, spatial_domain, temporal_domain) self._save_coverage(cov) cov.close() return dataset_id
def read_data_product_collection(self, data_product_collection_id=''): """Retrieve data product information @param data_product_collection_id str @retval data_product DataProductVersion """ result = self.clients.resource_registry.read(data_product_collection_id) validate_is_not_none(result, "Should not have returned an empty result") return result
def activate_data_product_persistence(self, data_product_id=''): """Persist data product data into a data set @param data_product_id str @throws NotFound object with specified id does not exist """ #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.data_product.read_one(data_product_id) validate_is_not_none(data_product_obj, "The data product id should correspond to a valid registered data product.") #-------------------------------------------------------------------------------- # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw #-------------------------------------------------------------------------------- streams = self.data_product.find_stemming_stream(data_product_id) if not streams: raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id)) stream_id = streams[0]._id log.debug("Activating data product persistence for stream_id: %s" % str(stream_id)) #----------------------------------------------------------------------------------------- # grab the ingestion configuration id from the data_product in order to use to persist it #----------------------------------------------------------------------------------------- if data_product_obj.dataset_configuration_id: ingestion_configuration_id = data_product_obj.dataset_configuration_id else: ingestion_configuration_id = self.clients.ingestion_management.list_ingestion_configurations(id_only=True)[0] #-------------------------------------------------------------------------------- # persist the data stream using the ingestion config id and stream id #-------------------------------------------------------------------------------- # find datasets for the data product dataset_id = self._get_dataset_id(data_product_id) log.debug("Activating data product persistence for dataset_id: %s" % str(dataset_id)) dataset_id = self.clients.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_configuration_id, dataset_id=dataset_id) # register the dataset for externalization self.clients.dataset_management.register_dataset(dataset_id) #-------------------------------------------------------------------------------- # todo: dataset_configuration_obj contains the ingest config for now... # Update the data product object #-------------------------------------------------------------------------------- data_product_obj.dataset_configuration_id = ingestion_configuration_id self.update_data_product(data_product_obj)
def update_event_process_definition(self, event_process_definition_id='', version='', module='', class_name='', uri='', arguments=None, event_types=None, sub_types=None, origin_types=None): """ Update the process definition for the event process. @param event_process_definition_id str @param version str @param module str @param class_name str @param uri str @arguments list """ validate_is_not_none(event_process_definition_id) # The event_process_def is really only a process_def. Read up the process definition process_def = self.clients.resource_registry.read( event_process_definition_id) definition = process_def.definition # Fetch or make a new EventProcessDefinitionDetail object if definition: event_process_def_detail = EventProcessDefinitionDetail() event_process_def_detail.event_types = event_types or definition.event_types event_process_def_detail.sub_types = sub_types or definition.sub_types event_process_def_detail.origin_types = origin_types or definition.origin_types else: event_process_def_detail = EventProcessDefinitionDetail( event_types=event_types, sub_types=sub_types, origin_types=origin_types) # event_process_def_detail = process_def.definition or EventProcessDefinitionDetail() # Update the fields of the process definition process_def.executable['module'] = module process_def.executable['class'] = class_name process_def.executable['uri'] = uri process_def.version = version process_def.arguments = arguments process_def.definition = event_process_def_detail # Finally update the resource registry self.clients.resource_registry.update(process_def)
def assign_input_stream_definition_to_data_process_definition(self, stream_definition_id='', data_process_definition_id=''): """Connect the input stream with a data process definition """ # Verify that both ids are valid, RR will throw if not found stream_definition_obj = self.clients.resource_registry.read(stream_definition_id) data_process_definition_obj = self.clients.resource_registry.read(data_process_definition_id) validate_is_not_none(stream_definition_obj, "No stream definition object found for stream definition id: %s" % stream_definition_id) validate_is_not_none(data_process_definition_obj, "No data process definition object found for data process" \ " definition id: %s" % data_process_definition_id) self.clients.resource_registry.create_association(data_process_definition_id, PRED.hasInputStreamDefinition, stream_definition_id)
def read_data_product_version(self, data_product_version_id=''): """Retrieve data product information @param data_product_version_id str @retval data_product DataProductVersion """ log.debug("DataProductManagementService:read_data_product_version: %s" % str(data_product_version_id)) result = self.clients.resource_registry.read(data_product_version_id) validate_is_not_none(result, "Should not have returned an empty result") return result
def get_data_product_provenance(self, data_product_id=''): # Retrieve information that characterizes how this data was produced # Return in a dictionary self.provenance_results = {} data_product = self.data_product.read_one(data_product_id) validate_is_not_none(data_product, "Should have got a non empty data product") # todo: get the start time of this data product self.data_product._find_producers(data_product_id, self.provenance_results) return self.provenance_results
def update_data_product_collection(self, data_product_collection=None): """@todo document this interface!!! @param data_product DataProductVersion @throws NotFound object with specified id does not exist """ validate_is_not_none(data_product_collection, "Should not pass in a None object") self.clients.resource_registry.update(data_product_collection) #TODO: any changes to producer? Call DataAcquisitionMgmtSvc? return
def unassign_stream_definition_from_data_process_definition(self, stream_definition_id='', data_process_definition_id=''): """ Disconnect the Data Product from the Data Producer @param stream_definition_id str @param data_process_definition_id str @throws NotFound object with specified id does not exist """ # Remove the link between the Stream Definition resource and the Data Process Definition resource associations = self.clients.resource_registry.find_associations(data_process_definition_id, PRED.hasStreamDefinition, stream_definition_id, id_only=True) validate_is_not_none(associations, "No Stream Definitions associated with data process definition ID " + str(data_process_definition_id)) for association in associations: self.clients.resource_registry.delete_association(association)
def on_start(self): SimpleProcess.on_start(self) self.data_retriever = DataRetrieverServiceProcessClient(process=self) self.interval_key = self.CFG.get_safe('process.interval_key', None) self.qc_params = self.CFG.get_safe('process.qc_params', []) validate_is_not_none( self.interval_key, 'An interval key is necessary to paunch this process') self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True) self.add_endpoint(self.event_subscriber) self.resource_registry = self.container.resource_registry self.run_interval = self.CFG.get_safe( 'service.qc_processing.run_interval', 24)
def create_data_product_collection(self, data_product_id='', collection_name='', collection_description=''): """Define a set of an existing data products that represent an improvement in the quality or understanding of the information. """ validate_is_not_none(data_product_id, 'A data product identifier must be passed to create a data product version') dpv = DataProductVersion() dpv.name = 'base' dpv.description = 'the base version on which subsequent versions are built' dpv.data_product_id = data_product_id dp_collection_obj = IonObject(RT.DataProductCollection, name=collection_name, description=collection_description, version_list=[dpv]) data_product_collection_id, rev = self.clients.resource_registry.create(dp_collection_obj) self.clients.resource_registry.create_association( subject=data_product_collection_id, predicate=PRED.hasVersion, object=data_product_id) return data_product_collection_id
def test_validations(self): import pyon.util.arg_check as arg_check with self.assertRaises(BadRequest): arg_check.validate_true(False, 'test') with self.assertRaises(BadRequest): arg_check.validate_equal(3, 4, 'test') with self.assertRaises(BadRequest): arg_check.validate_not_equal(4, 4, 'test') with self.assertRaises(BadRequest): arg_check.validate_false(True, 'test') with self.assertRaises(BadRequest): one = list() two = list() arg_check.validate_is(one, two, 'test') with self.assertRaises(BadRequest): one = list() two = one arg_check.validate_is_not(one, two, 'test') with self.assertRaises(BadRequest): c = None arg_check.validate_is_not_none(c, 'test') with self.assertRaises(BadRequest): one = list([1, 3]) two = 2 arg_check.validate_in(two, one, 'test') with self.assertRaises(BadRequest): one = list([1, 2, 3]) two = 2 arg_check.validate_not_in(two, one, 'test') with self.assertRaises(BadRequest): one = list() arg_check.validate_is_instance(one, dict, 'test') with self.assertRaises(BadRequest): one = list() arg_check.validate_not_is_instance(one, list, 'test')
def test_validations(self): import pyon.util.arg_check as arg_check with self.assertRaises(BadRequest): arg_check.validate_true(False,'test') with self.assertRaises(BadRequest): arg_check.validate_equal(3,4,'test') with self.assertRaises(BadRequest): arg_check.validate_not_equal(4,4,'test') with self.assertRaises(BadRequest): arg_check.validate_false(True,'test') with self.assertRaises(BadRequest): one = list() two = list() arg_check.validate_is(one,two,'test') with self.assertRaises(BadRequest): one = list() two = one arg_check.validate_is_not(one,two,'test') with self.assertRaises(BadRequest): c = None arg_check.validate_is_not_none(c,'test') with self.assertRaises(BadRequest): one = list([1,3]) two = 2 arg_check.validate_in(two,one,'test') with self.assertRaises(BadRequest): one = list([1,2,3]) two = 2 arg_check.validate_not_in(two,one,'test') with self.assertRaises(BadRequest): one = list() arg_check.validate_is_instance(one,dict,'test') with self.assertRaises(BadRequest): one = list() arg_check.validate_not_is_instance(one,list,'test')
def create_data_product_version(self, data_product_id='', data_product_version=None): """Define a new version of an existing set of information that represent an inprovement in the quality or understanding of the information. Only creates the second and higher versions of a DataProduct. The first version is implicit in the crate_data_product() operation. @param data_product_id str @param data_product_version DataProductVersion @retval data_product_version_id str @throws BadRequest if object does not have _id or _rev attribute @throws NotFound object with specified id does not exist """ validate_is_not_none(data_product_id, 'A data product identifier must be passed to create a data product version') validate_is_not_none(data_product_version, 'A data product version (ion object) must be passed to create a data product version') data_product_version_id, rev = self.clients.resource_registry.create(data_product_version) self.clients.resource_registry.create_association( subject=data_product_id, predicate=PRED.hasVersion, object=data_product_version_id) #----------------------------------------------------------------------------------------------- #Create the stream and a dataset for the new version #----------------------------------------------------------------------------------------------- stream_id = self.clients.pubsub_management.create_stream(name=data_product_version.name, description=data_product_version.description) # Associate the Stream with the main Data Product and with the default data product version self.clients.resource_registry.create_association( subject=data_product_version_id, predicate=PRED.hasStream, object=stream_id) #get the parameter_dictionary assoc with the original dataset dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, object_type=RT.DataSet, id_only=True) if not dataset_ids: raise BadRequest('No Dataset associated with the DataProduct %s' % str(data_product_id)) log.debug("DataProductManagementService:create_data_product_version base_dataset_id: %s", str(dataset_ids[0])) base_dataset_obj = self.clients.dataset_management.read_dataset(str(dataset_ids[0])) log.debug("DataProductManagementService:create_data_product_version base_dataset_obj: %s" % str(base_dataset_obj)) # create a dataset for this version. must have same parameter dictionary and spatial/temporal domain as original data product. data_set_id = self.clients.dataset_management.create_dataset( name= 'data_set_%s' % stream_id, stream_id=stream_id, parameter_dict=base_dataset_obj.parameter_dictionary, temporal_domain=base_dataset_obj.temporal_domain, spatial_domain=base_dataset_obj.spatial_domain) self.clients.resource_registry.create_association(subject=data_product_version_id, predicate=PRED.hasDataset, object=data_set_id) return data_product_version_id
def suspend_data_product_persistence(self, data_product_id=''): """Suspend data product data persistence into a data set, multiple options @param data_product_id str @param type str @throws NotFound object with specified id does not exist """ log.debug("suspend_data_product_persistence: data_product_id = %s" % str(data_product_id)) #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.clients.resource_registry.read(data_product_id) validate_is_not_none(data_product_obj, 'Should not have been empty') validate_is_instance(data_product_obj, DataProduct) if data_product_obj.dataset_configuration_id is None: raise NotFound("Data Product %s dataset configuration does not exist" % data_product_id) log.debug("Data product: %s" % data_product_obj) #-------------------------------------------------------------------------------- # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw #streams = self.data_product.find_stemming_stream(data_product_id) #-------------------------------------------------------------------------------- stream_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) if not stream_ids: raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id)) for stream_id in stream_ids: log.debug("suspend_data_product_persistence: stream = %s" % str(stream_id)) log.debug("data_product_obj.dataset_configuration_id: %s" % data_product_obj.dataset_configuration_id) ret = self.clients.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=data_product_obj.dataset_configuration_id) log.debug("suspend_data_product_persistence: deactivate = %s" % str(ret)) #-------------------------------------------------------------------------------- # detach the dataset from this data product #-------------------------------------------------------------------------------- dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) for dataset_id in dataset_ids: self.data_product.unlink_data_set(data_product_id, dataset_id)
def _get_input_stream_ids(self, in_data_product_ids = None): input_stream_ids = [] #------------------------------------------------------------------------------------------------------------------------------------------ # get the streams associated with this IN data products #------------------------------------------------------------------------------------------------------------------------------------------ for in_data_product_id in in_data_product_ids: # Get the stream associated with this input data product stream_ids, _ = self.clients.resource_registry.find_objects(in_data_product_id, PRED.hasStream, RT.Stream, True) validate_is_not_none( stream_ids, "No Stream created for this input Data Product " + str(in_data_product_id)) validate_is_not_none( len(stream_ids) != 1, "Input Data Product should only have ONE stream" + str(in_data_product_id)) # We take for now one stream_id associated with each input data product input_stream_ids.append(stream_ids[0]) return input_stream_ids
def update_event_process_definition(self, event_process_definition_id='', version='', module='', class_name='', uri='', arguments=None, event_types=None, sub_types=None, origin_types=None): """ Update the process definition for the event process. @param event_process_definition_id str @param version str @param module str @param class_name str @param uri str @arguments list """ validate_is_not_none(event_process_definition_id) # The event_process_def is really only a process_def. Read up the process definition process_def = self.clients.resource_registry.read(event_process_definition_id) definition = process_def.definition # Fetch or make a new EventProcessDefinitionDetail object if definition: event_process_def_detail = EventProcessDefinitionDetail() event_process_def_detail.event_types = event_types or definition.event_types event_process_def_detail.sub_types = sub_types or definition.sub_types event_process_def_detail.origin_types = origin_types or definition.origin_types else: event_process_def_detail = EventProcessDefinitionDetail(event_types = event_types, sub_types = sub_types, origin_types = origin_types) # event_process_def_detail = process_def.definition or EventProcessDefinitionDetail() # Update the fields of the process definition process_def.executable['module'] = module process_def.executable['class'] = class_name process_def.executable['uri'] = uri process_def.version = version process_def.arguments = arguments process_def.definition = event_process_def_detail # Finally update the resource registry self.clients.resource_registry.update(process_def)
def create_dataset(self, name='', datastore_name='', view_name='', stream_id='', parameter_dict=None, spatial_domain=None,temporal_domain=None, description=''): # validate_true(name and parameter_dict and temporal_domain and spatial_domain, 'Datasets require name, parameter dictionary, temporal and spatial domains.') validate_is_not_none(parameter_dict, 'A parameter dictionary must be supplied to register a new dataset.') validate_is_not_none(spatial_domain, 'A spatial domain must be supplied to register a new dataset.') validate_is_not_none(temporal_domain, 'A temporal domain must be supplied to register a new dataset.') dataset = DataSet() dataset.description = description dataset.name = name dataset.primary_view_key = stream_id or None dataset.datastore_name = datastore_name or self.DEFAULT_DATASTORE dataset.view_name = view_name or self.DEFAULT_VIEW dataset.parameter_dictionary = parameter_dict dataset.temporal_domain = temporal_domain dataset.spatial_domain = spatial_domain dataset_id, _ = self.clients.resource_registry.create(dataset) if stream_id: self.add_stream(dataset_id,stream_id) coverage = self._create_coverage(description or dataset_id, parameter_dict, spatial_domain, temporal_domain) self._persist_coverage(dataset_id, coverage) return dataset_id
def get_data_product_provenance(self, data_product_id=''): # Retrieve information that characterizes how this data was produced # Return in a dictionary provenance_results = {} current_data_product = data_product_id log.debug("DataProductManagementService:get_data_product_provenance: %s" % str(current_data_product)) data_product = self.data_product.read_one(data_product_id) validate_is_not_none(data_product, "Should have got a non empty data product") # todo: get the start time of this data product producer_ids = self._find_producers(data_product_id) while producer_ids: for producer_id in producer_ids: provenance_results[current_data_product] = { 'producer': producer_id, 'inputs': self._find_producer_in_products(producer_id) } producer_ids = self._find_producers(current_data_product) return {}
def unassign_stream_definition_from_data_process_definition( self, stream_definition_id='', data_process_definition_id=''): """ Disconnect the Data Product from the Data Producer @param stream_definition_id str @param data_process_definition_id str @throws NotFound object with specified id does not exist """ # Remove the link between the Stream Definition resource and the Data Process Definition resource associations = self.clients.resource_registry.find_associations( data_process_definition_id, PRED.hasStreamDefinition, stream_definition_id, id_only=True) validate_is_not_none( associations, "No Stream Definitions associated with data process definition ID " + str(data_process_definition_id)) for association in associations: self.clients.resource_registry.delete_association(association)
def create_data_product(self, data_product=None, stream_definition_id='', parameter_dictionary=None, exchange_point=''): """ @param data_product IonObject which defines the general data product resource @param source_resource_id IonObject id which defines the source for the data @retval data_product_id """ res, _ = self.clients.resource_registry.find_resources(restype=RT.DataProduct, name=data_product.name, id_only=True) validate_false(len(res), 'A data product with the name %s already exists.' % data_product.name) log.info('Creating DataProduct: %s', data_product.name) log.debug('%s', data_product.__dict__) # Create will validate and register a new data product within the system # If the stream definition has a parameter dictionary, use that validate_is_not_none(stream_definition_id, 'A stream definition id must be passed to register a data product') stream_def_obj = self.clients.pubsub_management.read_stream_definition(stream_definition_id) # Validates and checks for param_dict parameter_dictionary = stream_def_obj.parameter_dictionary or parameter_dictionary validate_is_not_none(parameter_dictionary , 'A parameter dictionary must be passed to register a data product') validate_is_not_none(data_product, 'A data product (ion object) must be passed to register a data product') exchange_point = exchange_point or 'science_data' #-------------------------------------------------------------------------------- # Register - create and store a new DataProduct resource using provided metadata #-------------------------------------------------------------------------------- data_product_id, rev = self.clients.resource_registry.create(data_product) #----------------------------------------------------------------------------------------------- #Create the stream and a dataset if a stream definition is provided #----------------------------------------------------------------------------------------------- #if stream_definition_id: #@todo: What about topics? stream_id,route = self.clients.pubsub_management.create_stream(name=data_product.name, exchange_point=exchange_point, description=data_product.description, stream_definition_id=stream_definition_id) # Associate the Stream with the main Data Product and with the default data product version self.data_product.link_stream(data_product_id, stream_id) # create a dataset... data_set_id = self.clients.dataset_management.create_dataset( name= 'data_set_%s' % stream_id, stream_id=stream_id, parameter_dict=parameter_dictionary, temporal_domain=data_product.temporal_domain, spatial_domain=data_product.spatial_domain) # link dataset with data product. This creates the association in the resource registry self.data_product.link_data_set(data_product_id=data_product_id, data_set_id=data_set_id) # Return the id of the new data product return data_product_id
def activate_data_product_persistence(self, data_product_id=''): """Persist data product data into a data set @param data_product_id str @throws NotFound object with specified id does not exist """ #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.data_product.read_one(data_product_id) validate_is_not_none(data_product_obj, "The data product id should correspond to a valid registered data product.") stream_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) if not stream_ids: raise BadRequest('Specified DataProduct has no streams associated with it') stream_id = stream_ids[0] stream_defs, _ = self.clients.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition,id_only=True) if not stream_defs: raise BadRequest("Data Product stream is without a stream definition") stream_def_id = stream_defs[0] stream_def = self.clients.pubsub_management.read_stream_definition(stream_def_id) # additional read necessary to fill in the pdict dataset_id = self.clients.dataset_management.create_dataset( name= 'data_set_%s' % stream_id, stream_id=stream_id, parameter_dict=stream_def.parameter_dictionary, temporal_domain=data_product_obj.temporal_domain, spatial_domain=data_product_obj.spatial_domain) # link dataset with data product. This creates the association in the resource registry self.data_product.link_data_set(data_product_id=data_product_id, data_set_id=dataset_id) log.debug("Activating data product persistence for stream_id: %s" % str(stream_id)) #----------------------------------------------------------------------------------------- # grab the ingestion configuration id from the data_product in order to use to persist it #----------------------------------------------------------------------------------------- if data_product_obj.dataset_configuration_id: ingestion_configuration_id = data_product_obj.dataset_configuration_id else: ingestion_configuration_id = self.clients.ingestion_management.list_ingestion_configurations(id_only=True)[0] #-------------------------------------------------------------------------------- # persist the data stream using the ingestion config id and stream id #-------------------------------------------------------------------------------- # find datasets for the data product dataset_id = self.clients.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_configuration_id, dataset_id=dataset_id) # register the dataset for externalization self.clients.dataset_management.register_dataset(dataset_id, external_data_product_name=data_product_obj.description or data_product_obj.name) #-------------------------------------------------------------------------------- # todo: dataset_configuration_obj contains the ingest config for now... # Update the data product object #-------------------------------------------------------------------------------- data_product_obj.dataset_configuration_id = ingestion_configuration_id self.update_data_product(data_product_obj)
def create_event_process(self, process_definition_id='', event_types=None, sub_types=None, origins=None, origin_types=None, out_data_products=None): """ Create an event process using a process definition. Pass to the event process, the info about the events that the event process will subscribe to. @param process_definition_id str @param event_types list @param sub_types list @param origins list @param origin_types list @return process_id """ # A process definition is required to be passed in validate_is_not_none(process_definition_id) #------------------------------------------------------------------------- # The output streams for the event process if any are provided #------------------------------------------------------------------------- output_streams = {} if out_data_products: for binding, output_data_product_id in out_data_products.iteritems(): stream_ids, _ = self.clients.resource_registry.find_objects(output_data_product_id, PRED.hasStream, RT.Stream, True) if not stream_ids: raise NotFound("No Stream created for output Data Product " + str(output_data_product_id)) if len(stream_ids) != 1: raise BadRequest("Data Product should only have ONE stream at this time" + str(output_data_product_id)) output_streams[binding] = stream_ids[0] #------------------------------------------------------------------------- # The process definition #------------------------------------------------------------------------- # read the process definition object process_definition = self.clients.resource_registry.read(process_definition_id) #------------------------------------------------------------------------- # Get the event process detail object from the process definition #------------------------------------------------------------------------- event_process_def_detail = process_definition.definition or EventProcessDefinitionDetail() event_process_detail = EventProcessDetail() # But if event_types etc have been specified when the method is called, put them in the new # event process detail object, thus overwriting the ones that were transferred from the event process def detail object event_process_detail.event_types = event_types or event_process_def_detail.event_types event_process_detail.sub_types = sub_types or event_process_def_detail.sub_types event_process_detail.origins = origins event_process_detail.origin_types = origin_types or event_process_def_detail.origin_types event_process_detail.output_streams = output_streams #------------------------------------------------------------------------- # Launch the process #------------------------------------------------------------------------- # Create a config to pass the event_types, origins etc to the process, which is about to be created config = DotDict() config.process.event_types = event_types config.process.sub_types = sub_types config.process.origins = origins config.process.origin_types = origin_types config.process.publish_streams = output_streams # Schedule the process pid = self.clients.process_dispatcher.schedule_process(process_definition_id= process_definition_id, configuration=config) event_process = self.clients.resource_registry.read(pid) event_process.detail = event_process_detail self.clients.resource_registry.update(event_process) #------------------------------------------------------------------------- # Associate the process with the process definition #------------------------------------------------------------------------- self.clients.resource_registry.create_association( subject=pid, predicate=PRED.hasProcessDefinition, object=process_definition_id) #------------------------------------------------------------------------- # Register the process as a data producer #------------------------------------------------------------------------- self.clients.data_acquisition_management.register_event_process(process_id = pid) return pid
def create_data_product(self, data_product=None, stream_definition_id='', parameter_dictionary = None): """ @param data_product IonObject which defines the general data product resource @param source_resource_id IonObject id which defines the source for the data @retval data_product_id """ # Create will validate and register a new data product within the system validate_is_not_none(parameter_dictionary, 'A parameter dictionary must be passed to register a data product') validate_is_not_none(stream_definition_id, 'A stream definition id must be passed to register a data product') validate_is_not_none(data_product, 'A data product (ion object) must be passed to register a data product') #-------------------------------------------------------------------------------- # Register - create and store a new DataProduct resource using provided metadata #-------------------------------------------------------------------------------- data_product_id, rev = self.clients.resource_registry.create(data_product) log.debug("data product id: %s" % data_product_id) #-------------------------------------------------------------------------------- # Register - create and store the default DataProductVersion resource using provided metadata #-------------------------------------------------------------------------------- #create the initial/default data product version data_product_version = DataProductVersion() data_product_version.name = data_product.name data_product_version.description = data_product.description dpv_id, rev = self.clients.resource_registry.create(data_product_version) self.clients.resource_registry.create_association( subject=data_product_id, predicate=PRED.hasVersion, object=dpv_id) #----------------------------------------------------------------------------------------------- #Create the stream and a dataset if a stream definition is provided #----------------------------------------------------------------------------------------------- log.debug("DataProductManagementService:create_data_product: stream definition id = %s" % stream_definition_id) #if stream_definition_id: stream_id = self.clients.pubsub_management.create_stream(name=data_product.name, description=data_product.description, stream_definition_id=stream_definition_id) # Associate the Stream with the main Data Product and with the default data product version self.data_product.link_stream(data_product_id, stream_id) self.clients.resource_registry.create_association( subject=dpv_id, predicate=PRED.hasStream, object=stream_id) # create a dataset... data_set_id = self.clients.dataset_management.create_dataset( name= 'data_set_%s' % stream_id, stream_id=stream_id, parameter_dict=parameter_dictionary, temporal_domain=data_product.temporal_domain, spatial_domain=data_product.spatial_domain) log.debug("DataProductManagementService:create_data_product: data_set_id = %s" % str(data_set_id)) data_set_obj = self.clients.dataset_management.read_dataset(data_set_id) log.debug("DataProductManagementService:create_data_product: data_set_obj = %s" % str(data_set_obj)) # link dataset with data product. This creates the association in the resource registry self.data_product.link_data_set(data_product_id=data_product_id, data_set_id=data_set_id) self.clients.resource_registry.create_association( subject=dpv_id, predicate=PRED.hasDataset, object=data_set_id) # Return the id of the new data product return data_product_id
def create_data_process(self, data_process_definition_id='', in_data_product_ids=None, out_data_products=None, configuration=None): """ @param data_process_definition_id: Object with definition of the process to apply to the input data product @param in_data_product_ids: ID of the input data products @param out_data_products: list of IDs of the output data products @retval data_process_id: ID of the newly created data process object """ inform = "Input Data Product: "+str(in_data_product_ids)+\ "\nTransformed by: "+str(data_process_definition_id)+\ "\nTo create output Product: "+str(out_data_products) + "\n" log.debug("DataProcessManagementService:create_data_process() method called with parameters:\n" + inform) #--------------------------------------------------------------------------------------- # Initialize #--------------------------------------------------------------------------------------- configuration = configuration or DotDict() validate_is_not_none( out_data_products, "No output data products passed in") #--------------------------------------------------------------------------------------- # Read the data process definition #--------------------------------------------------------------------------------------- data_process_definition = self.read_data_process_definition(data_process_definition_id) #--------------------------------------------------------------------------------------- # Read the output bindings from the definition #--------------------------------------------------------------------------------------- output_bindings = data_process_definition.output_bindings #--------------------------------------------------------------------------------------- # Find the process definition associated with this data process definition. # From the process definition, we can get the module and class to run.... #--------------------------------------------------------------------------------------- procdef_ids,_ = self.clients.resource_registry.find_objects(data_process_definition_id, PRED.hasProcessDefinition, RT.ProcessDefinition, id_only=True) if not procdef_ids: raise BadRequest("Cannot find associated ProcessDefinition for DataProcessDefinition id=%s" % data_process_definition_id) process_definition_id = procdef_ids[0] #--------------------------------------------------------------------------------------- # Create a data process object and register it #--------------------------------------------------------------------------------------- # get the name of the data process and create an IONObject for it data_process_name = create_unique_identifier("process_" + data_process_definition.name) data_process_obj = IonObject(RT.DataProcess, name=data_process_name) # register the data process data_process_id, version = self.clients.resource_registry.create(data_process_obj) data_process_obj = self.clients.resource_registry.read(data_process_id) #--------------------------------------------------------------------------------------- # Make the necessary associations, registering #--------------------------------------------------------------------------------------- #todo check if this assoc is needed? # Associate the data process with the data process definition self.clients.resource_registry.create_association(data_process_id, PRED.hasProcessDefinition, data_process_definition_id) # Register the data process instance as a data producer with DataAcquisitionMgmtSvc data_producer_id = self.clients.data_acquisition_management.register_process(data_process_id) log.debug("DataProcessManagementService:create_data_process register process " "with DataAcquisitionMgmtSvc: data_producer_id: %s (L4-CI-SA-RQ-181)", str(data_producer_id) ) #--------------------------------------------------------------------------------------- # Register each output data product with DAMS to create DataProducer links #--------------------------------------------------------------------------------------- output_stream_dict = {} if out_data_products is None: raise BadRequest("Data Process must have output product(s) specified %s", str(data_process_definition_id) ) for binding, output_data_product_id in out_data_products.iteritems(): # check that the product is not already associated with a producer producer_ids, _ = self.clients.resource_registry.find_objects(output_data_product_id, PRED.hasDataProducer, RT.DataProducer, True) if producer_ids: raise BadRequest("Data Product should not already be associated to a DataProducer %s hasDataProducer %s", str(data_process_id), str(producer_ids[0])) #Assign each output Data Product to this producer resource output_data_product_obj = self.clients.resource_registry.read(output_data_product_id) if not output_data_product_obj: raise NotFound("Output Data Product %s does not exist" % output_data_product_id) # Associate with DataProcess: register as an output product for this process log.debug("Link data process %s and output out data product: %s (L4-CI-SA-RQ-260)", str(data_process_id), str(output_data_product_id)) self.clients.data_acquisition_management.assign_data_product(input_resource_id= data_process_id,data_product_id= output_data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.clients.resource_registry.find_objects(output_data_product_id, PRED.hasStream, RT.Stream, True) if not stream_ids: raise NotFound("No Stream created for output Data Product " + str(output_data_product_id)) if len(stream_ids) != 1: raise BadRequest("Data Product should only have ONE stream at this time" + str(output_data_product_id)) output_stream_dict[binding] = stream_ids[0] #------------------------------------------------------------------------------------------------------------------------------------------ #Check for attached objects and put them into the configuration #------------------------------------------------------------------------------------------------------------------------------------------ # check for attachments in data process definition configuration = self._find_lookup_tables(data_process_definition_id, configuration) input_stream_ids = [] if in_data_product_ids: for in_data_product_id in in_data_product_ids: self.clients.resource_registry.create_association(data_process_id, PRED.hasInputProduct, in_data_product_id) log.debug("Associate data process workflows with source data products %s " "hasInputProducts %s (L4-CI-SA-RQ-260)", str(data_process_id), str(in_data_product_ids)) #check if in data product is attached to an instrument, check instrumentDevice and InstrumentModel for lookup table attachments instdevice_ids, _ = self.clients.resource_registry.find_subjects(RT.InstrumentDevice, PRED.hasOutputProduct, in_data_product_id, True) for instdevice_id in instdevice_ids: log.debug("Instrument device_id assoc to the input data product of this data process: %s (L4-CI-SA-RQ-231)", str(instdevice_id)) # check for attachments in instrument device configuration = self._find_lookup_tables(instdevice_id, configuration) instmodel_ids, _ = self.clients.resource_registry.find_objects(instdevice_id, PRED.hasModel, RT.InstrumentModel, True) for instmodel_id in instmodel_ids: # check for attachments in instrument model configuration = self._find_lookup_tables(instmodel_id, configuration) #------------------------------------------------------------------------------------------------------------------------------------------ # Get the input stream from the input_data_product, which should already be associated with a stream via the Data Producer #------------------------------------------------------------------------------------------------------------------------------------------ input_stream_ids = self._get_input_stream_ids(in_data_product_ids) #------------------------------------------------------------------------------------------------------------------------------------------ # Create subscription to the input stream #------------------------------------------------------------------------------------------------------------------------------------------ input_subscription_id = self.clients.pubsub_management.create_subscription(name=data_process_name, stream_ids=input_stream_ids) #------------------------------------------------------------------------------------------------------------------------------------------ # Add the subscription id to the data process #------------------------------------------------------------------------------------------------------------------------------------------ data_process_obj.input_subscription_id = input_subscription_id log.info("Launching the process") debug_str = "\n\tQueue Name: %s\n\tOutput Streams: %s\n\tProcess Definition ID: %s\n\tConfiguration: %s" % (data_process_name, output_stream_dict, process_definition_id, configuration) log.debug(debug_str) pid = self._launch_process( queue_name=data_process_name, out_streams=output_stream_dict, process_definition_id=process_definition_id, configuration=configuration) data_process_obj.process_id = pid self.clients.resource_registry.update(data_process_obj) return data_process_id
def activate_data_product_persistence(self, data_product_id=''): """Persist data product data into a data set @param data_product_id str @throws NotFound object with specified id does not exist """ #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.data_product.read_one(data_product_id) validate_is_not_none(data_product_obj, "The data product id should correspond to a valid registered data product.") #-------------------------------------------------------------------------------- # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw #-------------------------------------------------------------------------------- streams = self.data_product.find_stemming_stream(data_product_id) if not streams: raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id)) stream_id = streams[0]._id log.debug("activate_data_product_persistence: stream = %s" % str(stream_id)) #----------------------------------------------------------------------------------------- # grab the ingestion configuration id from the data_product in order to use to persist it #----------------------------------------------------------------------------------------- if data_product_obj.dataset_configuration_id: ingestion_configuration_id = data_product_obj.dataset_configuration_id else: ingestion_configuration_id = self.clients.ingestion_management.list_ingestion_configurations(id_only=True)[0] log.debug("ingestion_configuration_id for data product: %s" % ingestion_configuration_id) #-------------------------------------------------------------------------------- # persist the data stream using the ingestion config id and stream id #-------------------------------------------------------------------------------- # find datasets for the data product dataset_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, id_only=True) log.debug("Found the following datasets for the data product: %s" % dataset_ids) for dataset_id in dataset_ids: try: dataset_id = self.clients.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_configuration_id, dataset_id=dataset_id) except BadRequest: log.warning("Activate data product may have resulted in a duplicate attempt to associate a stream to a dataset") log.warning("Please note that creating a data product calls the create_dataset() method which already makes an association") log.debug("activate_data_product_persistence: dataset_id = %s" % str(dataset_id)) # link data set to data product #self.data_product.link_data_set(data_product_id, dataset_id) #-------------------------------------------------------------------------------- # todo: dataset_configuration_obj contains the ingest config for now... # Update the data product object #-------------------------------------------------------------------------------- data_product_obj.dataset_configuration_id = ingestion_configuration_id self.update_data_product(data_product_obj)
def create_event_process(self, process_definition_id='', event_types=None, sub_types=None, origins=None, origin_types=None, out_data_products=None): """ Create an event process using a process definition. Pass to the event process, the info about the events that the event process will subscribe to. @param process_definition_id str @param event_types list @param sub_types list @param origins list @param origin_types list @return process_id """ # A process definition is required to be passed in validate_is_not_none(process_definition_id) #------------------------------------------------------------------------- # The output streams for the event process if any are provided #------------------------------------------------------------------------- output_streams = {} if out_data_products: for binding, output_data_product_id in out_data_products.iteritems( ): stream_ids, _ = self.clients.resource_registry.find_objects( output_data_product_id, PRED.hasStream, RT.Stream, True) if not stream_ids: raise NotFound( "No Stream created for output Data Product " + str(output_data_product_id)) if len(stream_ids) != 1: raise BadRequest( "Data Product should only have ONE stream at this time" + str(output_data_product_id)) output_streams[binding] = stream_ids[0] #------------------------------------------------------------------------- # The process definition #------------------------------------------------------------------------- # read the process definition object process_definition = self.clients.resource_registry.read( process_definition_id) #------------------------------------------------------------------------- # Get the event process detail object from the process definition #------------------------------------------------------------------------- event_process_def_detail = process_definition.definition or EventProcessDefinitionDetail( ) event_process_detail = EventProcessDetail() # But if event_types etc have been specified when the method is called, put them in the new # event process detail object, thus overwriting the ones that were transferred from the event process def detail object event_process_detail.event_types = event_types or event_process_def_detail.event_types event_process_detail.sub_types = sub_types or event_process_def_detail.sub_types event_process_detail.origins = origins event_process_detail.origin_types = origin_types or event_process_def_detail.origin_types event_process_detail.output_streams = output_streams #------------------------------------------------------------------------- # Launch the process #------------------------------------------------------------------------- # Create a config to pass the event_types, origins etc to the process, which is about to be created config = DotDict() config.process.event_types = event_types config.process.sub_types = sub_types config.process.origins = origins config.process.origin_types = origin_types config.process.publish_streams = output_streams # Schedule the process pid = self.clients.process_dispatcher.schedule_process( process_definition_id=process_definition_id, configuration=config) event_process = self.clients.resource_registry.read(pid) event_process.detail = event_process_detail self.clients.resource_registry.update(event_process) #------------------------------------------------------------------------- # Associate the process with the process definition #------------------------------------------------------------------------- self.clients.resource_registry.create_association( subject=pid, predicate=PRED.hasProcessDefinition, object=process_definition_id) #------------------------------------------------------------------------- # Register the process as a data producer #------------------------------------------------------------------------- self.clients.data_acquisition_management.register_event_process( process_id=pid) return pid