コード例 #1
0
    def assign_stream_definition_to_data_process_definition(
            self,
            stream_definition_id='',
            data_process_definition_id='',
            binding=''):
        """Connect the output  stream with a data process definition
        """
        # Verify that both ids are valid, RR will throw if not found
        stream_definition_obj = self.clients.resource_registry.read(
            stream_definition_id)
        data_process_definition_obj = self.clients.resource_registry.read(
            data_process_definition_id)

        validate_is_not_none(
            stream_definition_obj,
            "No stream definition object found for stream definition id: %s" %
            stream_definition_id)
        validate_is_not_none(data_process_definition_obj, "No data process definition object found for data process"\
                                                          " definition id: %s" % data_process_definition_id)

        self.clients.resource_registry.create_association(
            data_process_definition_id, PRED.hasStreamDefinition,
            stream_definition_id)
        if binding:
            data_process_definition_obj.output_bindings[
                binding] = stream_definition_id
        self.clients.resource_registry.update(data_process_definition_obj)
コード例 #2
0
    def _launch_process(self, queue_name='', out_streams=None, process_definition_id='', configuration=None):
        """
        Launches the process
        """

        # ------------------------------------------------------------------------------------
        # Spawn Configuration and Parameters
        # ------------------------------------------------------------------------------------

        configuration['process'] = {
            'queue_name':queue_name,
            'publish_streams' : out_streams
        }

        # ------------------------------------------------------------------------------------
        # Process Spawning
        # ------------------------------------------------------------------------------------
        # Spawn the process
        pid = self.clients.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id,
            configuration=configuration
        )
        validate_is_not_none( pid, "Process could not be spawned")

        return pid
コード例 #3
0
    def _launch_process(self,
                        queue_name='',
                        out_streams=None,
                        process_definition_id='',
                        configuration=None):
        """
        Launches the process
        """

        # ------------------------------------------------------------------------------------
        # Spawn Configuration and Parameters
        # ------------------------------------------------------------------------------------

        if 'process' not in configuration:
            configuration['process'] = {}
        configuration['process']['queue_name'] = queue_name
        configuration['process']['publish_streams'] = out_streams

        # Setting the restart mode
        schedule = ProcessSchedule()
        schedule.restart_mode = ProcessRestartMode.ABNORMAL
        schedule.queueing_mode = ProcessQueueingMode.ALWAYS

        # ------------------------------------------------------------------------------------
        # Process Spawning
        # ------------------------------------------------------------------------------------
        # Spawn the process
        pid = self.clients.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id,
            schedule=schedule,
            configuration=configuration)
        validate_is_not_none(pid, "Process could not be spawned")

        return pid
コード例 #4
0
    def assign_stream_definition_to_data_process_definition(
        self, stream_definition_id="", data_process_definition_id="", binding=""
    ):
        """Connect the output  stream with a data process definition
        """
        # Verify that both ids are valid, RR will throw if not found
        stream_definition_obj = self.clients.resource_registry.read(stream_definition_id)
        data_process_definition_obj = self.clients.resource_registry.read(data_process_definition_id)

        validate_is_not_none(
            stream_definition_obj,
            "No stream definition object found for stream definition id: %s" % stream_definition_id,
        )
        validate_is_not_none(
            data_process_definition_obj,
            "No data process definition object found for data process"
            " definition id: %s" % data_process_definition_id,
        )

        self.clients.resource_registry.create_association(
            data_process_definition_id, PRED.hasStreamDefinition, stream_definition_id
        )
        if binding:
            data_process_definition_obj.output_bindings[binding] = stream_definition_id
        self.clients.resource_registry.update(data_process_definition_obj)
コード例 #5
0
    def create_dataset(self, name='', datastore_name='', view_name='', stream_id='', parameter_dict=None, spatial_domain=None, temporal_domain=None, parameter_dictionary_id='', description=''):
        validate_true(parameter_dict or parameter_dictionary_id, 'A parameter dictionary must be supplied to register a new dataset.')
        validate_is_not_none(spatial_domain, 'A spatial domain must be supplied to register a new dataset.')
        validate_is_not_none(temporal_domain, 'A temporal domain must be supplied to register a new dataset.')
        
        if parameter_dictionary_id:
            pd = self.read_parameter_dictionary(parameter_dictionary_id)
            pcs = self.read_parameter_contexts(parameter_dictionary_id, id_only=False)
            parameter_dict = self._merge_contexts([ParameterContext.load(i.parameter_context) for i in pcs], pd.temporal_context)
            parameter_dict = parameter_dict.dump()

        dataset                      = Dataset()
        dataset.description          = description
        dataset.name                 = name
        dataset.primary_view_key     = stream_id or None
        dataset.datastore_name       = datastore_name or self.DEFAULT_DATASTORE
        dataset.view_name            = view_name or self.DEFAULT_VIEW
        dataset.parameter_dictionary = parameter_dict
        dataset.temporal_domain      = temporal_domain
        dataset.spatial_domain       = spatial_domain
        dataset.registered           = False


        dataset_id, _ = self.clients.resource_registry.create(dataset)
        if stream_id:
            self.add_stream(dataset_id,stream_id)

        log.debug('creating dataset: %s', dataset_id)

        cov = self._create_coverage(dataset_id, description or dataset_id, parameter_dict, spatial_domain, temporal_domain) 
        self._save_coverage(cov)
        cov.close()

        return dataset_id
コード例 #6
0
    def suspend_data_product_persistence(self, data_product_id=''):
        """Suspend data product data persistence into a data set, multiple options

        @param data_product_id    str
        @param type    str
        @throws NotFound    object with specified id does not exist
        """

        #--------------------------------------------------------------------------------
        # retrieve the data_process object
        #--------------------------------------------------------------------------------
        data_product_obj = self.clients.resource_registry.read(data_product_id)

        validate_is_not_none(data_product_obj, 'Should not have been empty')
        validate_is_instance(data_product_obj, DataProduct)

        if data_product_obj.dataset_configuration_id is None:
            raise NotFound("Data Product %s dataset configuration does not exist" % data_product_id)

        #--------------------------------------------------------------------------------
        # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw
        #streams = self.data_product.find_stemming_stream(data_product_id)
        #--------------------------------------------------------------------------------
        stream_id = self._get_stream_id(data_product_id)
        validate_is_not_none(stream_id, 'Data Product %s must have one stream associated' % str(data_product_id))

        ret = self.clients.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=data_product_obj.dataset_configuration_id)
コード例 #7
0
    def add_data_product_version_to_collection(self, data_product_id='', data_product_collection_id='', version_name='', version_description=''):


        dp_collection_obj =self.clients.resource_registry.read(data_product_collection_id)

        #retrieve the stream definition for both the new data product to add to this collection and the base data product for this collection
        new_data_product_obj = self.clients.resource_registry.read(data_product_id)
        new_data_product_streams, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        validate_is_not_none(new_data_product_streams, 'The data product to add to the collection must have an associated stream')
        new_data_product_streamdefs, _ = self.clients.resource_registry.find_objects(subject=new_data_product_streams[0], predicate=PRED.hasStreamDefinition, object_type=RT.StreamDefinition, id_only=True)

        base_data_product_id = dp_collection_obj.version_list[0].data_product_id
        base_data_product_obj = self.clients.resource_registry.read(base_data_product_id)
        base_data_product_streams, _ = self.clients.resource_registry.find_objects(subject=base_data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        validate_is_not_none(base_data_product_streams, 'The base data product in the collection must have an associated stream')
        base_data_product_streamdefs, _ = self.clients.resource_registry.find_objects(subject=base_data_product_streams[0], predicate=PRED.hasStreamDefinition, object_type=RT.StreamDefinition, id_only=True)
        if not self.clients.pubsub_management.compare_stream_definition(stream_definition1_id=new_data_product_streamdefs[0], stream_definition2_id=base_data_product_streamdefs[0]):
            raise BadRequest("All Data Products in a collection must have equivelent stream definitions.")

        #todo: validate that the spatial/temporal domain match the base data product


        dpv = DataProductVersion()
        dpv.name = version_name
        dpv.description = version_description
        dpv.data_product_id = data_product_id

        dp_collection_obj.version_list.append(dpv)
        self.clients.resource_registry.update(dp_collection_obj)

        self.clients.resource_registry.create_association( subject=data_product_collection_id, predicate=PRED.hasVersion, object=data_product_id)

        return
コード例 #8
0
    def _get_input_stream_ids(self, in_data_product_ids=None):

        input_stream_ids = []

        #------------------------------------------------------------------------------------------------------------------------------------------
        # get the streams associated with this IN data products
        #------------------------------------------------------------------------------------------------------------------------------------------
        for in_data_product_id in in_data_product_ids:

            # Get the stream associated with this input data product
            stream_ids, _ = self.clients.resource_registry.find_objects(
                in_data_product_id, PRED.hasStream, RT.Stream, True)

            validate_is_not_none(
                stream_ids, "No Stream created for this input Data Product " +
                str(in_data_product_id))
            validate_is_not_none(
                len(stream_ids) != 1,
                "Input Data Product should only have ONE stream" +
                str(in_data_product_id))

            # We take for now one stream_id associated with each input data product
            input_stream_ids.append(stream_ids[0])

        return input_stream_ids
コード例 #9
0
    def _launch_process(self, queue_name='', out_streams=None, process_definition_id='', configuration=None):
        """
        Launches the process
        """

        # ------------------------------------------------------------------------------------
        # Spawn Configuration and Parameters
        # ------------------------------------------------------------------------------------

        if 'process' not in configuration:
            configuration['process'] = {}
        configuration['process']['queue_name'] = queue_name
        configuration['process']['publish_streams'] = out_streams

        # Setting the restart mode
        schedule = ProcessSchedule()
        schedule.restart_mode = ProcessRestartMode.ABNORMAL

        # ------------------------------------------------------------------------------------
        # Process Spawning
        # ------------------------------------------------------------------------------------
        # Spawn the process
        pid = self.clients.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id,
            schedule= schedule,
            configuration=configuration
        )
        validate_is_not_none( pid, "Process could not be spawned")

        return pid
コード例 #10
0
 def on_start(self):
     SimpleProcess.on_start(self)
     self.data_retriever = DataRetrieverServiceProcessClient(process=self)
     self.interval_key = self.CFG.get_safe('process.interval_key',None)
     self.qc_params    = self.CFG.get_safe('process.qc_params',[])
     validate_is_not_none(self.interval_key, 'An interval key is necessary to paunch this process')
     self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True)
     self.add_endpoint(self.event_subscriber)
     self.resource_registry = self.container.resource_registry
コード例 #11
0
    def create_dataset(self,
                       name='',
                       datastore_name='',
                       view_name='',
                       stream_id='',
                       parameter_dict=None,
                       spatial_domain=None,
                       temporal_domain=None,
                       parameter_dictionary_id='',
                       description=''):
        validate_true(
            parameter_dict or parameter_dictionary_id,
            'A parameter dictionary must be supplied to register a new dataset.'
        )
        validate_is_not_none(
            spatial_domain,
            'A spatial domain must be supplied to register a new dataset.')
        validate_is_not_none(
            temporal_domain,
            'A temporal domain must be supplied to register a new dataset.')

        if parameter_dictionary_id:
            pd = self.read_parameter_dictionary(parameter_dictionary_id)
            pcs = self.read_parameter_contexts(parameter_dictionary_id,
                                               id_only=False)
            parameter_dict = self._merge_contexts(
                [ParameterContext.load(i.parameter_context) for i in pcs],
                pd.temporal_context)
            parameter_dict = parameter_dict.dump()

        dataset = Dataset()
        dataset.description = description
        dataset.name = name
        dataset.primary_view_key = stream_id or None
        dataset.datastore_name = datastore_name or self.DEFAULT_DATASTORE
        dataset.view_name = view_name or self.DEFAULT_VIEW
        dataset.parameter_dictionary = parameter_dict
        dataset.temporal_domain = temporal_domain
        dataset.spatial_domain = spatial_domain
        dataset.registered = False

        dataset_id, _ = self.clients.resource_registry.create(dataset)
        if stream_id:
            self.add_stream(dataset_id, stream_id)

        log.debug('creating dataset: %s', dataset_id)

        cov = self._create_coverage(dataset_id, description or dataset_id,
                                    parameter_dict, spatial_domain,
                                    temporal_domain)
        self._save_coverage(cov)
        cov.close()

        return dataset_id
コード例 #12
0
    def read_data_product_collection(self, data_product_collection_id=''):
        """Retrieve data product information

        @param data_product_collection_id    str
        @retval data_product    DataProductVersion
        """
        result = self.clients.resource_registry.read(data_product_collection_id)

        validate_is_not_none(result, "Should not have returned an empty result")

        return result
コード例 #13
0
    def activate_data_product_persistence(self, data_product_id=''):
        """Persist data product data into a data set

        @param data_product_id    str
        @throws NotFound    object with specified id does not exist
        """
        #--------------------------------------------------------------------------------
        # retrieve the data_process object
        #--------------------------------------------------------------------------------
        data_product_obj = self.data_product.read_one(data_product_id)

        validate_is_not_none(data_product_obj, "The data product id should correspond to a valid registered data product.")

        #--------------------------------------------------------------------------------
        # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw
        #--------------------------------------------------------------------------------
        streams = self.data_product.find_stemming_stream(data_product_id)
        if not streams:
            raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id))

        stream_id = streams[0]._id
        log.debug("Activating data product persistence for stream_id: %s"  % str(stream_id))



        #-----------------------------------------------------------------------------------------
        # grab the ingestion configuration id from the data_product in order to use to persist it
        #-----------------------------------------------------------------------------------------
        if data_product_obj.dataset_configuration_id:
            ingestion_configuration_id = data_product_obj.dataset_configuration_id
        else:
            ingestion_configuration_id = self.clients.ingestion_management.list_ingestion_configurations(id_only=True)[0]

        #--------------------------------------------------------------------------------
        # persist the data stream using the ingestion config id and stream id
        #--------------------------------------------------------------------------------

        # find datasets for the data product
        dataset_id = self._get_dataset_id(data_product_id)
        log.debug("Activating data product persistence for dataset_id: %s"  % str(dataset_id))
        dataset_id = self.clients.ingestion_management.persist_data_stream(stream_id=stream_id,
                                                ingestion_configuration_id=ingestion_configuration_id,
                                                dataset_id=dataset_id)

        # register the dataset for externalization
        self.clients.dataset_management.register_dataset(dataset_id)


        #--------------------------------------------------------------------------------
        # todo: dataset_configuration_obj contains the ingest config for now...
        # Update the data product object
        #--------------------------------------------------------------------------------
        data_product_obj.dataset_configuration_id = ingestion_configuration_id
        self.update_data_product(data_product_obj)
コード例 #14
0
    def update_event_process_definition(self,
                                        event_process_definition_id='',
                                        version='',
                                        module='',
                                        class_name='',
                                        uri='',
                                        arguments=None,
                                        event_types=None,
                                        sub_types=None,
                                        origin_types=None):
        """
        Update the process definition for the event process.

        @param event_process_definition_id str
        @param version str
        @param module str
        @param class_name str
        @param uri str
        @arguments list
        """

        validate_is_not_none(event_process_definition_id)

        # The event_process_def is really only a process_def. Read up the process definition
        process_def = self.clients.resource_registry.read(
            event_process_definition_id)

        definition = process_def.definition

        # Fetch or make a new EventProcessDefinitionDetail object
        if definition:
            event_process_def_detail = EventProcessDefinitionDetail()
            event_process_def_detail.event_types = event_types or definition.event_types
            event_process_def_detail.sub_types = sub_types or definition.sub_types
            event_process_def_detail.origin_types = origin_types or definition.origin_types
        else:
            event_process_def_detail = EventProcessDefinitionDetail(
                event_types=event_types,
                sub_types=sub_types,
                origin_types=origin_types)

#        event_process_def_detail = process_def.definition or EventProcessDefinitionDetail()

# Update the fields of the process definition
        process_def.executable['module'] = module
        process_def.executable['class'] = class_name
        process_def.executable['uri'] = uri
        process_def.version = version
        process_def.arguments = arguments
        process_def.definition = event_process_def_detail

        # Finally update the resource registry
        self.clients.resource_registry.update(process_def)
コード例 #15
0
    def assign_input_stream_definition_to_data_process_definition(self, stream_definition_id='', data_process_definition_id=''):
        """Connect the input  stream with a data process definition
        """
        # Verify that both ids are valid, RR will throw if not found
        stream_definition_obj = self.clients.resource_registry.read(stream_definition_id)
        data_process_definition_obj = self.clients.resource_registry.read(data_process_definition_id)

        validate_is_not_none(stream_definition_obj, "No stream definition object found for stream definition id: %s" % stream_definition_id)
        validate_is_not_none(data_process_definition_obj, "No data process definition object found for data process" \
                                                          " definition id: %s" % data_process_definition_id)

        self.clients.resource_registry.create_association(data_process_definition_id,  PRED.hasInputStreamDefinition,  stream_definition_id)
コード例 #16
0
    def read_data_product_version(self, data_product_version_id=''):
        """Retrieve data product information

        @param data_product_version_id    str
        @retval data_product    DataProductVersion
        """
        log.debug("DataProductManagementService:read_data_product_version: %s" % str(data_product_version_id))

        result = self.clients.resource_registry.read(data_product_version_id)

        validate_is_not_none(result, "Should not have returned an empty result")

        return result
コード例 #17
0
    def get_data_product_provenance(self, data_product_id=''):

        # Retrieve information that characterizes how this data was produced
        # Return in a dictionary

        self.provenance_results = {}

        data_product = self.data_product.read_one(data_product_id)
        validate_is_not_none(data_product, "Should have got a non empty data product")

        # todo: get the start time of this data product
        self.data_product._find_producers(data_product_id, self.provenance_results)

        return self.provenance_results
コード例 #18
0
    def update_data_product_collection(self, data_product_collection=None):
        """@todo document this interface!!!

        @param data_product    DataProductVersion
        @throws NotFound    object with specified id does not exist
        """

        validate_is_not_none(data_product_collection, "Should not pass in a None object")

        self.clients.resource_registry.update(data_product_collection)

        #TODO: any changes to producer? Call DataAcquisitionMgmtSvc?

        return
コード例 #19
0
    def unassign_stream_definition_from_data_process_definition(self, stream_definition_id='', data_process_definition_id=''):
        """
        Disconnect the Data Product from the Data Producer

        @param stream_definition_id    str
        @param data_process_definition_id    str
        @throws NotFound    object with specified id does not exist
        """

        # Remove the link between the Stream Definition resource and the Data Process Definition resource
        associations = self.clients.resource_registry.find_associations(data_process_definition_id, PRED.hasStreamDefinition, stream_definition_id, id_only=True)

        validate_is_not_none(associations, "No Stream Definitions associated with data process definition ID " + str(data_process_definition_id))
        for association in associations:
            self.clients.resource_registry.delete_association(association)
コード例 #20
0
 def on_start(self):
     SimpleProcess.on_start(self)
     self.data_retriever = DataRetrieverServiceProcessClient(process=self)
     self.interval_key = self.CFG.get_safe('process.interval_key', None)
     self.qc_params = self.CFG.get_safe('process.qc_params', [])
     validate_is_not_none(
         self.interval_key,
         'An interval key is necessary to paunch this process')
     self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent,
                                             origin=self.interval_key,
                                             callback=self._event_callback,
                                             auto_delete=True)
     self.add_endpoint(self.event_subscriber)
     self.resource_registry = self.container.resource_registry
     self.run_interval = self.CFG.get_safe(
         'service.qc_processing.run_interval', 24)
コード例 #21
0
    def create_data_product_collection(self, data_product_id='', collection_name='', collection_description=''):
        """Define a  set of an existing data products that represent an improvement in the quality or
        understanding of the information.
        """
        validate_is_not_none(data_product_id, 'A data product identifier must be passed to create a data product version')

        dpv = DataProductVersion()
        dpv.name = 'base'
        dpv.description = 'the base version on which subsequent versions are built'
        dpv.data_product_id = data_product_id

        dp_collection_obj = IonObject(RT.DataProductCollection, name=collection_name, description=collection_description, version_list=[dpv])

        data_product_collection_id, rev = self.clients.resource_registry.create(dp_collection_obj)
        self.clients.resource_registry.create_association( subject=data_product_collection_id, predicate=PRED.hasVersion, object=data_product_id)

        return data_product_collection_id
コード例 #22
0
ファイル: test_arg_check.py プロジェクト: swarbhanu/pyon
    def test_validations(self):
        import pyon.util.arg_check as arg_check

        with self.assertRaises(BadRequest):
            arg_check.validate_true(False, 'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_equal(3, 4, 'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_not_equal(4, 4, 'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_false(True, 'test')

        with self.assertRaises(BadRequest):
            one = list()
            two = list()
            arg_check.validate_is(one, two, 'test')

        with self.assertRaises(BadRequest):
            one = list()
            two = one
            arg_check.validate_is_not(one, two, 'test')

        with self.assertRaises(BadRequest):
            c = None
            arg_check.validate_is_not_none(c, 'test')

        with self.assertRaises(BadRequest):
            one = list([1, 3])
            two = 2
            arg_check.validate_in(two, one, 'test')

        with self.assertRaises(BadRequest):
            one = list([1, 2, 3])
            two = 2
            arg_check.validate_not_in(two, one, 'test')

        with self.assertRaises(BadRequest):
            one = list()
            arg_check.validate_is_instance(one, dict, 'test')

        with self.assertRaises(BadRequest):
            one = list()
            arg_check.validate_not_is_instance(one, list, 'test')
コード例 #23
0
ファイル: test_arg_check.py プロジェクト: ateranishi/pyon
    def test_validations(self):
        import pyon.util.arg_check as arg_check

        with self.assertRaises(BadRequest):
            arg_check.validate_true(False,'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_equal(3,4,'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_not_equal(4,4,'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_false(True,'test')

        with self.assertRaises(BadRequest):
            one = list()
            two = list()
            arg_check.validate_is(one,two,'test')

        with self.assertRaises(BadRequest):
            one = list()
            two = one
            arg_check.validate_is_not(one,two,'test')

        with self.assertRaises(BadRequest):
            c = None
            arg_check.validate_is_not_none(c,'test')

        with self.assertRaises(BadRequest):
            one = list([1,3])
            two = 2
            arg_check.validate_in(two,one,'test')

        with self.assertRaises(BadRequest):
            one = list([1,2,3])
            two = 2
            arg_check.validate_not_in(two,one,'test')

        with self.assertRaises(BadRequest):
            one = list()
            arg_check.validate_is_instance(one,dict,'test')

        with self.assertRaises(BadRequest):
            one = list()
            arg_check.validate_not_is_instance(one,list,'test')
コード例 #24
0
    def create_data_product_version(self, data_product_id='', data_product_version=None):
        """Define a new version of an existing set of information that represent an inprovement in the quality or
        understanding of the information. Only creates the second and higher versions of a DataProduct.
        The first version is implicit in the crate_data_product() operation.

        @param data_product_id    str
        @param data_product_version    DataProductVersion
        @retval data_product_version_id    str
        @throws BadRequest    if object does not have _id or _rev attribute
        @throws NotFound    object with specified id does not exist
        """
        validate_is_not_none(data_product_id, 'A data product identifier must be passed to create a data product version')
        validate_is_not_none(data_product_version, 'A data product version (ion object) must be passed to create a data product version')

        data_product_version_id, rev = self.clients.resource_registry.create(data_product_version)
        self.clients.resource_registry.create_association( subject=data_product_id, predicate=PRED.hasVersion, object=data_product_version_id)


        #-----------------------------------------------------------------------------------------------
        #Create the stream and a dataset for the new version
        #-----------------------------------------------------------------------------------------------
        stream_id = self.clients.pubsub_management.create_stream(name=data_product_version.name,
                                                                description=data_product_version.description)

        # Associate the Stream with the main Data Product and with the default data product version
        self.clients.resource_registry.create_association( subject=data_product_version_id, predicate=PRED.hasStream, object=stream_id)

        #get the parameter_dictionary assoc with the original dataset
        dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, object_type=RT.DataSet, id_only=True)
        if not dataset_ids:
            raise BadRequest('No Dataset associated with the DataProduct %s' % str(data_product_id))

        log.debug("DataProductManagementService:create_data_product_version base_dataset_id: %s", str(dataset_ids[0]))
        base_dataset_obj = self.clients.dataset_management.read_dataset(str(dataset_ids[0]))
        log.debug("DataProductManagementService:create_data_product_version base_dataset_obj: %s" % str(base_dataset_obj))

        # create a dataset for this version. must have same parameter dictionary and spatial/temporal domain as original data product.
        data_set_id = self.clients.dataset_management.create_dataset(   name= 'data_set_%s' % stream_id,
                                                                        stream_id=stream_id,
                                                                        parameter_dict=base_dataset_obj.parameter_dictionary,
                                                                        temporal_domain=base_dataset_obj.temporal_domain,
                                                                        spatial_domain=base_dataset_obj.spatial_domain)
        self.clients.resource_registry.create_association(subject=data_product_version_id, predicate=PRED.hasDataset, object=data_set_id)

        return data_product_version_id
コード例 #25
0
    def suspend_data_product_persistence(self, data_product_id=''):
        """Suspend data product data persistence into a data set, multiple options

        @param data_product_id    str
        @param type    str
        @throws NotFound    object with specified id does not exist
        """

        log.debug("suspend_data_product_persistence: data_product_id = %s"  % str(data_product_id))

        #--------------------------------------------------------------------------------
        # retrieve the data_process object
        #--------------------------------------------------------------------------------
        data_product_obj = self.clients.resource_registry.read(data_product_id)

        validate_is_not_none(data_product_obj, 'Should not have been empty')
        validate_is_instance(data_product_obj, DataProduct)

        if data_product_obj.dataset_configuration_id is None:
            raise NotFound("Data Product %s dataset configuration does not exist" % data_product_id)

        log.debug("Data product: %s" % data_product_obj)

        #--------------------------------------------------------------------------------
        # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw
        #streams = self.data_product.find_stemming_stream(data_product_id)
        #--------------------------------------------------------------------------------
        stream_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        if not stream_ids:
            raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id))

        for stream_id in stream_ids:
            log.debug("suspend_data_product_persistence: stream = %s"  % str(stream_id))
            log.debug("data_product_obj.dataset_configuration_id: %s" % data_product_obj.dataset_configuration_id)

            ret = self.clients.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=data_product_obj.dataset_configuration_id)
            log.debug("suspend_data_product_persistence: deactivate = %s"  % str(ret))

        #--------------------------------------------------------------------------------
        # detach the dataset from this data product
        #--------------------------------------------------------------------------------
        dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True)
        for dataset_id in dataset_ids:
            self.data_product.unlink_data_set(data_product_id, dataset_id)
コード例 #26
0
    def _get_input_stream_ids(self, in_data_product_ids = None):

        input_stream_ids = []

        #------------------------------------------------------------------------------------------------------------------------------------------
        # get the streams associated with this IN data products
        #------------------------------------------------------------------------------------------------------------------------------------------
        for  in_data_product_id in in_data_product_ids:

            # Get the stream associated with this input data product
            stream_ids, _ = self.clients.resource_registry.find_objects(in_data_product_id, PRED.hasStream, RT.Stream, True)

            validate_is_not_none( stream_ids, "No Stream created for this input Data Product " + str(in_data_product_id))
            validate_is_not_none( len(stream_ids) != 1, "Input Data Product should only have ONE stream" + str(in_data_product_id))

            # We take for now one stream_id associated with each input data product
            input_stream_ids.append(stream_ids[0])

        return input_stream_ids
コード例 #27
0
    def update_event_process_definition(self, event_process_definition_id='', version='', module='', class_name='', uri='', arguments=None, event_types=None, sub_types=None, origin_types=None):
        """
        Update the process definition for the event process.

        @param event_process_definition_id str
        @param version str
        @param module str
        @param class_name str
        @param uri str
        @arguments list
        """

        validate_is_not_none(event_process_definition_id)

        # The event_process_def is really only a process_def. Read up the process definition
        process_def = self.clients.resource_registry.read(event_process_definition_id)

        definition = process_def.definition

        # Fetch or make a new EventProcessDefinitionDetail object
        if definition:
            event_process_def_detail = EventProcessDefinitionDetail()
            event_process_def_detail.event_types = event_types or definition.event_types
            event_process_def_detail.sub_types = sub_types or definition.sub_types
            event_process_def_detail.origin_types = origin_types or definition.origin_types
        else:
            event_process_def_detail = EventProcessDefinitionDetail(event_types = event_types, sub_types = sub_types, origin_types = origin_types)

#        event_process_def_detail = process_def.definition or EventProcessDefinitionDetail()



        # Update the fields of the process definition
        process_def.executable['module'] = module
        process_def.executable['class'] = class_name
        process_def.executable['uri'] = uri
        process_def.version = version
        process_def.arguments = arguments
        process_def.definition = event_process_def_detail

        # Finally update the resource registry
        self.clients.resource_registry.update(process_def)
コード例 #28
0
    def create_dataset(self, name='', datastore_name='', view_name='', stream_id='', parameter_dict=None, spatial_domain=None,temporal_domain=None, description=''):
#        validate_true(name and parameter_dict and temporal_domain and spatial_domain, 'Datasets require name, parameter dictionary, temporal and spatial domains.')

        validate_is_not_none(parameter_dict, 'A parameter dictionary must be supplied to register a new dataset.')
        validate_is_not_none(spatial_domain, 'A spatial domain must be supplied to register a new dataset.')
        validate_is_not_none(temporal_domain, 'A temporal domain must be supplied to register a new dataset.')

        dataset                      = DataSet()
        dataset.description          = description
        dataset.name                 = name
        dataset.primary_view_key     = stream_id or None
        dataset.datastore_name       = datastore_name or self.DEFAULT_DATASTORE
        dataset.view_name            = view_name or self.DEFAULT_VIEW
        dataset.parameter_dictionary = parameter_dict
        dataset.temporal_domain      = temporal_domain
        dataset.spatial_domain       = spatial_domain


        dataset_id, _ = self.clients.resource_registry.create(dataset)
        if stream_id:
            self.add_stream(dataset_id,stream_id)

        coverage = self._create_coverage(description or dataset_id, parameter_dict, spatial_domain, temporal_domain) 
        self._persist_coverage(dataset_id, coverage)

        return dataset_id
コード例 #29
0
    def get_data_product_provenance(self, data_product_id=''):

        # Retrieve information that characterizes how this data was produced
        # Return in a dictionary

        provenance_results = {}
        current_data_product = data_product_id

        log.debug("DataProductManagementService:get_data_product_provenance: %s" % str(current_data_product))

        data_product = self.data_product.read_one(data_product_id)
        validate_is_not_none(data_product, "Should have got a non empty data product")

        # todo: get the start time of this data product
        producer_ids = self._find_producers(data_product_id)

        while producer_ids:
            for producer_id in producer_ids:
                provenance_results[current_data_product] = { 'producer': producer_id, 'inputs': self._find_producer_in_products(producer_id) }
                producer_ids = self._find_producers(current_data_product)

        return {}
コード例 #30
0
    def unassign_stream_definition_from_data_process_definition(
            self, stream_definition_id='', data_process_definition_id=''):
        """
        Disconnect the Data Product from the Data Producer

        @param stream_definition_id    str
        @param data_process_definition_id    str
        @throws NotFound    object with specified id does not exist
        """

        # Remove the link between the Stream Definition resource and the Data Process Definition resource
        associations = self.clients.resource_registry.find_associations(
            data_process_definition_id,
            PRED.hasStreamDefinition,
            stream_definition_id,
            id_only=True)

        validate_is_not_none(
            associations,
            "No Stream Definitions associated with data process definition ID "
            + str(data_process_definition_id))
        for association in associations:
            self.clients.resource_registry.delete_association(association)
コード例 #31
0
    def create_data_product(self, data_product=None, stream_definition_id='', parameter_dictionary=None, exchange_point=''):
        """
        @param      data_product IonObject which defines the general data product resource
        @param      source_resource_id IonObject id which defines the source for the data
        @retval     data_product_id
        """

        res, _ = self.clients.resource_registry.find_resources(restype=RT.DataProduct, name=data_product.name, id_only=True)
        validate_false(len(res), 'A data product with the name %s already exists.' % data_product.name)
        log.info('Creating DataProduct: %s', data_product.name)
        log.debug('%s', data_product.__dict__)


        # Create will validate and register a new data product within the system
        # If the stream definition has a parameter dictionary, use that
        validate_is_not_none(stream_definition_id, 'A stream definition id must be passed to register a data product')
        stream_def_obj = self.clients.pubsub_management.read_stream_definition(stream_definition_id) # Validates and checks for param_dict
        parameter_dictionary = stream_def_obj.parameter_dictionary or parameter_dictionary
        validate_is_not_none(parameter_dictionary , 'A parameter dictionary must be passed to register a data product')
        validate_is_not_none(data_product, 'A data product (ion object) must be passed to register a data product')
        exchange_point = exchange_point or 'science_data'

        #--------------------------------------------------------------------------------
        # Register - create and store a new DataProduct resource using provided metadata
        #--------------------------------------------------------------------------------
        data_product_id, rev = self.clients.resource_registry.create(data_product)


        #-----------------------------------------------------------------------------------------------
        #Create the stream and a dataset if a stream definition is provided
        #-----------------------------------------------------------------------------------------------

        #if stream_definition_id:
        #@todo: What about topics?

        stream_id,route = self.clients.pubsub_management.create_stream(name=data_product.name,
                                                                exchange_point=exchange_point,
                                                                description=data_product.description,
                                                                stream_definition_id=stream_definition_id)

        # Associate the Stream with the main Data Product and with the default data product version
        self.data_product.link_stream(data_product_id, stream_id)


        # create a dataset...
        data_set_id = self.clients.dataset_management.create_dataset(   name= 'data_set_%s' % stream_id,
                                                                        stream_id=stream_id,
                                                                        parameter_dict=parameter_dictionary,
                                                                        temporal_domain=data_product.temporal_domain,
                                                                        spatial_domain=data_product.spatial_domain)

        # link dataset with data product. This creates the association in the resource registry
        self.data_product.link_data_set(data_product_id=data_product_id, data_set_id=data_set_id)

        # Return the id of the new data product
        return data_product_id
コード例 #32
0
    def activate_data_product_persistence(self, data_product_id=''):
        """Persist data product data into a data set

        @param data_product_id    str
        @throws NotFound    object with specified id does not exist
        """
        #--------------------------------------------------------------------------------
        # retrieve the data_process object
        #--------------------------------------------------------------------------------
        data_product_obj = self.data_product.read_one(data_product_id)

        validate_is_not_none(data_product_obj, "The data product id should correspond to a valid registered data product.")

        stream_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        if not stream_ids:
            raise BadRequest('Specified DataProduct has no streams associated with it')
        stream_id = stream_ids[0]

        stream_defs, _ = self.clients.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition,id_only=True)
        if not stream_defs:
            raise BadRequest("Data Product stream is without a stream definition")
        stream_def_id = stream_defs[0]

        stream_def = self.clients.pubsub_management.read_stream_definition(stream_def_id) # additional read necessary to fill in the pdict

        
        

        dataset_id = self.clients.dataset_management.create_dataset(   name= 'data_set_%s' % stream_id,
                                                                        stream_id=stream_id,
                                                                        parameter_dict=stream_def.parameter_dictionary,
                                                                        temporal_domain=data_product_obj.temporal_domain,
                                                                        spatial_domain=data_product_obj.spatial_domain)

        # link dataset with data product. This creates the association in the resource registry
        self.data_product.link_data_set(data_product_id=data_product_id, data_set_id=dataset_id)
        
        log.debug("Activating data product persistence for stream_id: %s"  % str(stream_id))



        #-----------------------------------------------------------------------------------------
        # grab the ingestion configuration id from the data_product in order to use to persist it
        #-----------------------------------------------------------------------------------------
        if data_product_obj.dataset_configuration_id:
            ingestion_configuration_id = data_product_obj.dataset_configuration_id
        else:
            ingestion_configuration_id = self.clients.ingestion_management.list_ingestion_configurations(id_only=True)[0]

        #--------------------------------------------------------------------------------
        # persist the data stream using the ingestion config id and stream id
        #--------------------------------------------------------------------------------

        # find datasets for the data product
        dataset_id = self.clients.ingestion_management.persist_data_stream(stream_id=stream_id,
                                                ingestion_configuration_id=ingestion_configuration_id,
                                                dataset_id=dataset_id)

        # register the dataset for externalization
        self.clients.dataset_management.register_dataset(dataset_id, external_data_product_name=data_product_obj.description or data_product_obj.name)


        #--------------------------------------------------------------------------------
        # todo: dataset_configuration_obj contains the ingest config for now...
        # Update the data product object
        #--------------------------------------------------------------------------------
        data_product_obj.dataset_configuration_id = ingestion_configuration_id
        self.update_data_product(data_product_obj)
コード例 #33
0
    def create_event_process(self, process_definition_id='', event_types=None, sub_types=None, origins=None, origin_types=None, out_data_products=None):
        """
        Create an event process using a process definition. Pass to the event process,
        the info about the events that the event process will subscribe to.

        @param process_definition_id str
        @param event_types list
        @param sub_types list
        @param origins list
        @param origin_types list

        @return process_id
        """

        # A process definition is required to be passed in
        validate_is_not_none(process_definition_id)

        #-------------------------------------------------------------------------
        # The output streams for the event process if any are provided
        #-------------------------------------------------------------------------

        output_streams = {}

        if out_data_products:
            for binding, output_data_product_id in out_data_products.iteritems():
                stream_ids, _ = self.clients.resource_registry.find_objects(output_data_product_id, PRED.hasStream, RT.Stream, True)
                if not stream_ids:
                    raise NotFound("No Stream created for output Data Product " + str(output_data_product_id))
                if len(stream_ids) != 1:
                    raise BadRequest("Data Product should only have ONE stream at this time" + str(output_data_product_id))
                output_streams[binding] = stream_ids[0]

        #-------------------------------------------------------------------------
        # The process definition
        #-------------------------------------------------------------------------

        # read the process definition object
        process_definition = self.clients.resource_registry.read(process_definition_id)

        #-------------------------------------------------------------------------
        # Get the event process detail object from the process definition
        #-------------------------------------------------------------------------
        event_process_def_detail = process_definition.definition or EventProcessDefinitionDetail()
        event_process_detail = EventProcessDetail()

        # But if event_types etc have been specified when the method is called, put them in the new
        # event process detail object, thus overwriting the ones that were transferred from the event process def detail object
        event_process_detail.event_types = event_types or event_process_def_detail.event_types
        event_process_detail.sub_types = sub_types or event_process_def_detail.sub_types
        event_process_detail.origins = origins
        event_process_detail.origin_types = origin_types or event_process_def_detail.origin_types
        event_process_detail.output_streams = output_streams

        #-------------------------------------------------------------------------
        # Launch the process
        #-------------------------------------------------------------------------

        # Create a config to pass the event_types, origins etc to the process, which is about to be created
        config = DotDict()
        config.process.event_types = event_types
        config.process.sub_types = sub_types
        config.process.origins = origins
        config.process.origin_types = origin_types
        config.process.publish_streams = output_streams


        # Schedule the process
        pid = self.clients.process_dispatcher.schedule_process(process_definition_id= process_definition_id,
                                                                configuration=config)

        event_process = self.clients.resource_registry.read(pid)
        event_process.detail = event_process_detail
        self.clients.resource_registry.update(event_process)

        #-------------------------------------------------------------------------
        # Associate the process with the process definition
        #-------------------------------------------------------------------------
        self.clients.resource_registry.create_association(  subject=pid,
                                                            predicate=PRED.hasProcessDefinition,
                                                            object=process_definition_id)

        #-------------------------------------------------------------------------
        # Register the process as a data producer
        #-------------------------------------------------------------------------
        self.clients.data_acquisition_management.register_event_process(process_id = pid)

        return pid
コード例 #34
0
    def create_data_product(self, data_product=None, stream_definition_id='', parameter_dictionary = None):
        """
        @param      data_product IonObject which defines the general data product resource
        @param      source_resource_id IonObject id which defines the source for the data
        @retval     data_product_id
        """

        # Create will validate and register a new data product within the system
        validate_is_not_none(parameter_dictionary, 'A parameter dictionary must be passed to register a data product')
        validate_is_not_none(stream_definition_id, 'A stream definition id must be passed to register a data product')
        validate_is_not_none(data_product, 'A data product (ion object) must be passed to register a data product')

        #--------------------------------------------------------------------------------
        # Register - create and store a new DataProduct resource using provided metadata
        #--------------------------------------------------------------------------------
        data_product_id, rev = self.clients.resource_registry.create(data_product)

        log.debug("data product id: %s" % data_product_id)

        #--------------------------------------------------------------------------------
        # Register - create and store the default DataProductVersion resource using provided metadata
        #--------------------------------------------------------------------------------
        #create the initial/default data product version
        data_product_version = DataProductVersion()
        data_product_version.name = data_product.name
        data_product_version.description = data_product.description
        dpv_id, rev = self.clients.resource_registry.create(data_product_version)
        self.clients.resource_registry.create_association( subject=data_product_id, predicate=PRED.hasVersion, object=dpv_id)

        #-----------------------------------------------------------------------------------------------
        #Create the stream and a dataset if a stream definition is provided
        #-----------------------------------------------------------------------------------------------
        log.debug("DataProductManagementService:create_data_product: stream definition id = %s" % stream_definition_id)

        #if stream_definition_id:
        stream_id = self.clients.pubsub_management.create_stream(name=data_product.name,
                                                                description=data_product.description,
                                                                stream_definition_id=stream_definition_id)

        # Associate the Stream with the main Data Product and with the default data product version
        self.data_product.link_stream(data_product_id, stream_id)
        self.clients.resource_registry.create_association( subject=dpv_id, predicate=PRED.hasStream, object=stream_id)


        # create a dataset...
        data_set_id = self.clients.dataset_management.create_dataset(   name= 'data_set_%s' % stream_id,
                                                                        stream_id=stream_id,
                                                                        parameter_dict=parameter_dictionary,
                                                                        temporal_domain=data_product.temporal_domain,
                                                                        spatial_domain=data_product.spatial_domain)

        log.debug("DataProductManagementService:create_data_product: data_set_id = %s" % str(data_set_id))
        data_set_obj = self.clients.dataset_management.read_dataset(data_set_id)
        log.debug("DataProductManagementService:create_data_product: data_set_obj = %s" % str(data_set_obj))

        # link dataset with data product. This creates the association in the resource registry
        self.data_product.link_data_set(data_product_id=data_product_id, data_set_id=data_set_id)
        self.clients.resource_registry.create_association( subject=dpv_id, predicate=PRED.hasDataset, object=data_set_id)

        # Return the id of the new data product
        return data_product_id
コード例 #35
0
    def create_data_process(self, data_process_definition_id='', in_data_product_ids=None, out_data_products=None, configuration=None):
        """
        @param  data_process_definition_id: Object with definition of the
                    process to apply to the input data product
        @param  in_data_product_ids: ID of the input data products
        @param  out_data_products: list of IDs of the output data products
        @retval data_process_id: ID of the newly created data process object
        """

        inform = "Input Data Product: "+str(in_data_product_ids)+\
                 "\nTransformed by: "+str(data_process_definition_id)+\
                 "\nTo create output Product: "+str(out_data_products) + "\n"
        log.debug("DataProcessManagementService:create_data_process() method called with parameters:\n" +
                  inform)

        #---------------------------------------------------------------------------------------
        # Initialize
        #---------------------------------------------------------------------------------------

        configuration = configuration or DotDict()

        validate_is_not_none( out_data_products, "No output data products passed in")

        #---------------------------------------------------------------------------------------
        # Read the data process definition
        #---------------------------------------------------------------------------------------
        data_process_definition = self.read_data_process_definition(data_process_definition_id)

        #---------------------------------------------------------------------------------------
        # Read the output bindings from the definition
        #---------------------------------------------------------------------------------------

        output_bindings = data_process_definition.output_bindings

        #---------------------------------------------------------------------------------------
        # Find the process definition associated with this data process definition.
        # From the process definition, we can get the module and class to run....
        #---------------------------------------------------------------------------------------

        procdef_ids,_ = self.clients.resource_registry.find_objects(data_process_definition_id, PRED.hasProcessDefinition, RT.ProcessDefinition, id_only=True)
        if not procdef_ids:
            raise BadRequest("Cannot find associated ProcessDefinition for DataProcessDefinition id=%s" % data_process_definition_id)

        process_definition_id = procdef_ids[0]

        #---------------------------------------------------------------------------------------
        # Create a data process object and register it
        #---------------------------------------------------------------------------------------

        # get the name of the data process and create an IONObject for it
        data_process_name = create_unique_identifier("process_" + data_process_definition.name)
        data_process_obj = IonObject(RT.DataProcess, name=data_process_name)

        # register the data process
        data_process_id, version = self.clients.resource_registry.create(data_process_obj)

        data_process_obj = self.clients.resource_registry.read(data_process_id)

        #---------------------------------------------------------------------------------------
        # Make the necessary associations, registering
        #---------------------------------------------------------------------------------------

        #todo check if this assoc is needed?
        # Associate the data process with the data process definition
        self.clients.resource_registry.create_association(data_process_id,  PRED.hasProcessDefinition, data_process_definition_id)

        # Register the data process instance as a data producer with DataAcquisitionMgmtSvc
        data_producer_id = self.clients.data_acquisition_management.register_process(data_process_id)
        log.debug("DataProcessManagementService:create_data_process register process "
                  "with DataAcquisitionMgmtSvc: data_producer_id: %s   (L4-CI-SA-RQ-181)", str(data_producer_id) )

        #---------------------------------------------------------------------------------------
        # Register each output data product with DAMS to create DataProducer links
        #---------------------------------------------------------------------------------------
        output_stream_dict = {}

        if out_data_products is None:
            raise BadRequest("Data Process must have output product(s) specified %s",  str(data_process_definition_id) )

        for binding, output_data_product_id in out_data_products.iteritems():

            # check that the product is not already associated with a producer
            producer_ids, _ = self.clients.resource_registry.find_objects(output_data_product_id, PRED.hasDataProducer, RT.DataProducer, True)
            if producer_ids:
                raise BadRequest("Data Product should not already be associated to a DataProducer %s hasDataProducer %s", str(data_process_id), str(producer_ids[0]))

            #Assign each output Data Product to this producer resource
            output_data_product_obj = self.clients.resource_registry.read(output_data_product_id)
            if not output_data_product_obj:
                raise NotFound("Output Data Product %s does not exist" % output_data_product_id)

            # Associate with DataProcess: register as an output product for this process
            log.debug("Link data process %s and output out data product: %s  (L4-CI-SA-RQ-260)", str(data_process_id), str(output_data_product_id))
            self.clients.data_acquisition_management.assign_data_product(input_resource_id= data_process_id,data_product_id= output_data_product_id)

            # Retrieve the id of the OUTPUT stream from the out Data Product
            stream_ids, _ = self.clients.resource_registry.find_objects(output_data_product_id, PRED.hasStream, RT.Stream, True)

            if not stream_ids:
                raise NotFound("No Stream created for output Data Product " + str(output_data_product_id))

            if len(stream_ids) != 1:
                raise BadRequest("Data Product should only have ONE stream at this time" + str(output_data_product_id))

            output_stream_dict[binding] = stream_ids[0]

        #------------------------------------------------------------------------------------------------------------------------------------------
        #Check for attached objects and put them into the configuration
        #------------------------------------------------------------------------------------------------------------------------------------------

        # check for attachments in data process definition
        configuration = self._find_lookup_tables(data_process_definition_id, configuration)
        input_stream_ids = []

        if in_data_product_ids:
            for  in_data_product_id in in_data_product_ids:

                self.clients.resource_registry.create_association(data_process_id, PRED.hasInputProduct, in_data_product_id)
                log.debug("Associate data process workflows with source data products %s "
                          "hasInputProducts  %s   (L4-CI-SA-RQ-260)", str(data_process_id), str(in_data_product_ids))

                #check if in data product is attached to an instrument, check instrumentDevice and InstrumentModel for lookup table attachments
                instdevice_ids, _ = self.clients.resource_registry.find_subjects(RT.InstrumentDevice, PRED.hasOutputProduct, in_data_product_id, True)

                for instdevice_id in instdevice_ids:
                    log.debug("Instrument device_id assoc to the input data product of this data process: %s   (L4-CI-SA-RQ-231)", str(instdevice_id))

                    # check for attachments in instrument device
                    configuration = self._find_lookup_tables(instdevice_id, configuration)
                    instmodel_ids, _ = self.clients.resource_registry.find_objects(instdevice_id, PRED.hasModel, RT.InstrumentModel, True)

                    for instmodel_id in instmodel_ids:
                        # check for attachments in instrument model
                        configuration = self._find_lookup_tables(instmodel_id, configuration)

            #------------------------------------------------------------------------------------------------------------------------------------------
            # Get the input stream from the input_data_product, which should already be associated with a stream via the Data Producer
            #------------------------------------------------------------------------------------------------------------------------------------------
            input_stream_ids = self._get_input_stream_ids(in_data_product_ids)

        #------------------------------------------------------------------------------------------------------------------------------------------
        # Create subscription to the input stream
        #------------------------------------------------------------------------------------------------------------------------------------------
        input_subscription_id = self.clients.pubsub_management.create_subscription(name=data_process_name, stream_ids=input_stream_ids)

        #------------------------------------------------------------------------------------------------------------------------------------------
        # Add the subscription id to the data process
        #------------------------------------------------------------------------------------------------------------------------------------------
        data_process_obj.input_subscription_id = input_subscription_id

        log.info("Launching the process")
        debug_str = "\n\tQueue Name: %s\n\tOutput Streams: %s\n\tProcess Definition ID: %s\n\tConfiguration: %s" % (data_process_name, output_stream_dict, process_definition_id, configuration)
        log.debug(debug_str)

        pid = self._launch_process(
                           queue_name=data_process_name,
                           out_streams=output_stream_dict,
                           process_definition_id=process_definition_id,
                           configuration=configuration)

        data_process_obj.process_id = pid
        self.clients.resource_registry.update(data_process_obj)
        return data_process_id
コード例 #36
0
    def activate_data_product_persistence(self, data_product_id=''):
        """Persist data product data into a data set

        @param data_product_id    str
        @throws NotFound    object with specified id does not exist
        """
        #--------------------------------------------------------------------------------
        # retrieve the data_process object
        #--------------------------------------------------------------------------------
        data_product_obj = self.data_product.read_one(data_product_id)

        validate_is_not_none(data_product_obj, "The data product id should correspond to a valid registered data product.")

        #--------------------------------------------------------------------------------
        # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw
        #--------------------------------------------------------------------------------
        streams = self.data_product.find_stemming_stream(data_product_id)
        if not streams:
            raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id))

        stream_id = streams[0]._id
        log.debug("activate_data_product_persistence: stream = %s"  % str(stream_id))



        #-----------------------------------------------------------------------------------------
        # grab the ingestion configuration id from the data_product in order to use to persist it
        #-----------------------------------------------------------------------------------------
        if data_product_obj.dataset_configuration_id:
            ingestion_configuration_id = data_product_obj.dataset_configuration_id
        else:
            ingestion_configuration_id = self.clients.ingestion_management.list_ingestion_configurations(id_only=True)[0]

        log.debug("ingestion_configuration_id for data product: %s" % ingestion_configuration_id)

        #--------------------------------------------------------------------------------
        # persist the data stream using the ingestion config id and stream id
        #--------------------------------------------------------------------------------

        # find datasets for the data product
        dataset_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, id_only=True)

        log.debug("Found the following datasets for the data product: %s" % dataset_ids)
        for dataset_id in dataset_ids:

            try:
                dataset_id = self.clients.ingestion_management.persist_data_stream(stream_id=stream_id,
                    ingestion_configuration_id=ingestion_configuration_id,
                    dataset_id=dataset_id)
            except BadRequest:
                log.warning("Activate data product may have resulted in a duplicate attempt to associate a stream to a dataset")
                log.warning("Please note that creating a data product calls the create_dataset() method which already makes an association")

            log.debug("activate_data_product_persistence: dataset_id = %s"  % str(dataset_id))

            # link data set to data product
            #self.data_product.link_data_set(data_product_id, dataset_id)

        #--------------------------------------------------------------------------------
        # todo: dataset_configuration_obj contains the ingest config for now...
        # Update the data product object
        #--------------------------------------------------------------------------------
        data_product_obj.dataset_configuration_id = ingestion_configuration_id
        self.update_data_product(data_product_obj)
コード例 #37
0
    def create_event_process(self,
                             process_definition_id='',
                             event_types=None,
                             sub_types=None,
                             origins=None,
                             origin_types=None,
                             out_data_products=None):
        """
        Create an event process using a process definition. Pass to the event process,
        the info about the events that the event process will subscribe to.

        @param process_definition_id str
        @param event_types list
        @param sub_types list
        @param origins list
        @param origin_types list

        @return process_id
        """

        # A process definition is required to be passed in
        validate_is_not_none(process_definition_id)

        #-------------------------------------------------------------------------
        # The output streams for the event process if any are provided
        #-------------------------------------------------------------------------

        output_streams = {}

        if out_data_products:
            for binding, output_data_product_id in out_data_products.iteritems(
            ):
                stream_ids, _ = self.clients.resource_registry.find_objects(
                    output_data_product_id, PRED.hasStream, RT.Stream, True)
                if not stream_ids:
                    raise NotFound(
                        "No Stream created for output Data Product " +
                        str(output_data_product_id))
                if len(stream_ids) != 1:
                    raise BadRequest(
                        "Data Product should only have ONE stream at this time"
                        + str(output_data_product_id))
                output_streams[binding] = stream_ids[0]

        #-------------------------------------------------------------------------
        # The process definition
        #-------------------------------------------------------------------------

        # read the process definition object
        process_definition = self.clients.resource_registry.read(
            process_definition_id)

        #-------------------------------------------------------------------------
        # Get the event process detail object from the process definition
        #-------------------------------------------------------------------------
        event_process_def_detail = process_definition.definition or EventProcessDefinitionDetail(
        )
        event_process_detail = EventProcessDetail()

        # But if event_types etc have been specified when the method is called, put them in the new
        # event process detail object, thus overwriting the ones that were transferred from the event process def detail object
        event_process_detail.event_types = event_types or event_process_def_detail.event_types
        event_process_detail.sub_types = sub_types or event_process_def_detail.sub_types
        event_process_detail.origins = origins
        event_process_detail.origin_types = origin_types or event_process_def_detail.origin_types
        event_process_detail.output_streams = output_streams

        #-------------------------------------------------------------------------
        # Launch the process
        #-------------------------------------------------------------------------

        # Create a config to pass the event_types, origins etc to the process, which is about to be created
        config = DotDict()
        config.process.event_types = event_types
        config.process.sub_types = sub_types
        config.process.origins = origins
        config.process.origin_types = origin_types
        config.process.publish_streams = output_streams

        # Schedule the process
        pid = self.clients.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id, configuration=config)

        event_process = self.clients.resource_registry.read(pid)
        event_process.detail = event_process_detail
        self.clients.resource_registry.update(event_process)

        #-------------------------------------------------------------------------
        # Associate the process with the process definition
        #-------------------------------------------------------------------------
        self.clients.resource_registry.create_association(
            subject=pid,
            predicate=PRED.hasProcessDefinition,
            object=process_definition_id)

        #-------------------------------------------------------------------------
        # Register the process as a data producer
        #-------------------------------------------------------------------------
        self.clients.data_acquisition_management.register_event_process(
            process_id=pid)

        return pid