class DataProductManagementService(BaseDataProductManagementService): """ @author Bill Bollenbacher @file ion/services/sa/product/data_product_management_service.py @brief Implementation of the data product management service """ def on_init(self): self.override_clients(self.clients) def override_clients(self, new_clients): """ Replaces the service clients with a new set of them... and makes sure they go to the right places """ self.data_product = DataProductImpl(self.clients) def create_data_product(self, data_product=None, stream_definition_id='', exchange_point=''): """ @param data_product IonObject which defines the general data product resource @param source_resource_id IonObject id which defines the source for the data @retval data_product_id """ res, _ = self.clients.resource_registry.find_resources(restype=RT.DataProduct, name=data_product.name, id_only=True) validate_false(len(res), 'A data product with the name %s already exists.' % data_product.name) log.info('Creating DataProduct: %s', data_product.name) # Create will validate and register a new data product within the system # If the stream definition has a parameter dictionary, use that validate_is_not_none(stream_definition_id, 'A stream definition id must be passed to register a data product') stream_def_obj = self.clients.pubsub_management.read_stream_definition(stream_definition_id) # Validates and checks for param_dict parameter_dictionary = stream_def_obj.parameter_dictionary validate_is_not_none(parameter_dictionary , 'A parameter dictionary must be passed to register a data product') validate_is_not_none(data_product, 'A data product (ion object) must be passed to register a data product') exchange_point = exchange_point or 'science_data' #-------------------------------------------------------------------------------- # Register - create and store a new DataProduct resource using provided metadata #-------------------------------------------------------------------------------- data_product_id, rev = self.clients.resource_registry.create(data_product) #----------------------------------------------------------------------------------------------- #Create the stream and a dataset if a stream definition is provided #----------------------------------------------------------------------------------------------- #if stream_definition_id: #@todo: What about topics? stream_id,route = self.clients.pubsub_management.create_stream(name=data_product.name, exchange_point=exchange_point, description=data_product.description, stream_definition_id=stream_definition_id) # Associate the Stream with the main Data Product and with the default data product version self.data_product.link_stream(data_product_id, stream_id) # Return the id of the new data product return data_product_id def read_data_product(self, data_product_id=''): """ method docstring """ # Retrieve all metadata for a specific data product # Return data product resource data_product = self.data_product.read_one(data_product_id) validate_is_instance(data_product,DataProduct) return data_product def update_data_product(self, data_product=None): """ @todo document this interface!!! @param data_product DataProduct @throws NotFound object with specified id does not exist """ validate_is_instance(data_product, DataProduct) self.data_product.update_one(data_product) #TODO: any changes to producer? Call DataAcquisitionMgmtSvc? def delete_data_product(self, data_product_id=''): #Check if this data product is associated to a producer #todo: convert to impl call producer_objs = self.data_product.find_stemming_data_producer(data_product_id) for producer_obj in producer_objs: self.clients.data_acquisition_management.unassign_data_product(data_product_id, producer_obj._id) #-------------------------------------------------------------------------------- # suspend persistence #-------------------------------------------------------------------------------- if self.is_persisted(data_product_id): self.suspend_data_product_persistence(data_product_id) #-------------------------------------------------------------------------------- # remove stream associations #-------------------------------------------------------------------------------- #self.remove_streams(data_product_id) #-------------------------------------------------------------------------------- # remove dataset associations #-------------------------------------------------------------------------------- dataset_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, id_only=True) # for dataset_id in dataset_ids: # self.data_product.unlink_data_set(data_product_id=data_product_id, data_set_id=dataset_id) #-------------------------------------------------------------------------------- # Delete the data product #-------------------------------------------------------------------------------- self.data_product.delete_one(data_product_id) def force_delete_data_product(self, data_product_id=''): # if not yet deleted, the first execute delete logic dp_obj = self.read_data_product(data_product_id) if dp_obj.lcstate != LCS.RETIRED: self.delete_data_product(data_product_id) #get the assoc producers before deleteing the links producers, producer_assns = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataProducer, RT.DataProducer, True) self._remove_associations(data_product_id) for producer in producers: self.clients.resource_registry.delete(producer) self.clients.resource_registry.delete(data_product_id) def remove_streams(self, data_product_id=''): streams, assocs = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) datasets, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) for dataset in datasets: for stream, assoc in zip(streams,assocs): self.clients.resource_registry.delete_association(assoc) self.clients.pubsub_management.delete_stream(stream) self.clients.dataset_management.remove_stream(dataset, stream) return streams def find_data_products(self, filters=None): """ method docstring """ # Validate the input filter and augment context as required # Define set of resource attributes to filter on, change parameter from "filter" to include attributes and filter values. # potentially: title, keywords, date_created, creator_name, project, geospatial coords, time range # Call DM DiscoveryService to query the catalog for matches # Organize and return the list of matches with summary metadata (title, summary, keywords) return self.data_product.find_some(filters) def activate_data_product_persistence(self, data_product_id=''): """Persist data product data into a data set @param data_product_id str @throws NotFound object with specified id does not exist """ #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.data_product.read_one(data_product_id) validate_is_not_none(data_product_obj, "The data product id should correspond to a valid registered data product.") stream_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) if not stream_ids: raise BadRequest('Specified DataProduct has no streams associated with it') stream_id = stream_ids[0] stream_defs, _ = self.clients.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition,id_only=True) if not stream_defs: raise BadRequest("Data Product stream is without a stream definition") stream_def_id = stream_defs[0] stream_def = self.clients.pubsub_management.read_stream_definition(stream_def_id) # additional read necessary to fill in the pdict dataset_id = self.clients.dataset_management.create_dataset( name= 'data_set_%s' % stream_id, stream_id=stream_id, parameter_dict=stream_def.parameter_dictionary, temporal_domain=data_product_obj.temporal_domain, spatial_domain=data_product_obj.spatial_domain) # link dataset with data product. This creates the association in the resource registry self.data_product.link_data_set(data_product_id=data_product_id, data_set_id=dataset_id) log.debug("Activating data product persistence for stream_id: %s" % str(stream_id)) #----------------------------------------------------------------------------------------- # grab the ingestion configuration id from the data_product in order to use to persist it #----------------------------------------------------------------------------------------- if data_product_obj.dataset_configuration_id: ingestion_configuration_id = data_product_obj.dataset_configuration_id else: ingestion_configuration_id = self.clients.ingestion_management.list_ingestion_configurations(id_only=True)[0] #-------------------------------------------------------------------------------- # persist the data stream using the ingestion config id and stream id #-------------------------------------------------------------------------------- # find datasets for the data product dataset_id = self.clients.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_configuration_id, dataset_id=dataset_id) # register the dataset for externalization self.clients.dataset_management.register_dataset(dataset_id, external_data_product_name=data_product_obj.description or data_product_obj.name) #-------------------------------------------------------------------------------- # todo: dataset_configuration_obj contains the ingest config for now... # Update the data product object #-------------------------------------------------------------------------------- data_product_obj.dataset_configuration_id = ingestion_configuration_id self.update_data_product(data_product_obj) def is_persisted(self, data_product_id=''): # Is the data product currently persisted into a data set? if data_product_id: stream_id = self._get_stream_id(data_product_id) if stream_id: return self.clients.ingestion_management.is_persisted(stream_id) return False def suspend_data_product_persistence(self, data_product_id=''): """Suspend data product data persistence into a data set, multiple options @param data_product_id str @param type str @throws NotFound object with specified id does not exist """ #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.clients.resource_registry.read(data_product_id) validate_is_not_none(data_product_obj, 'Should not have been empty') validate_is_instance(data_product_obj, DataProduct) if data_product_obj.dataset_configuration_id is None: raise NotFound("Data Product %s dataset configuration does not exist" % data_product_id) #-------------------------------------------------------------------------------- # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw #streams = self.data_product.find_stemming_stream(data_product_id) #-------------------------------------------------------------------------------- stream_id = self._get_stream_id(data_product_id) validate_is_not_none(stream_id, 'Data Product %s must have one stream associated' % str(data_product_id)) ret = self.clients.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=data_product_obj.dataset_configuration_id) #-------------------------------------------------------------------------------- # detach the dataset from this data product #-------------------------------------------------------------------------------- # dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) # for dataset_id in dataset_ids: # self.data_product.unlink_data_set(data_product_id, dataset_id) def get_data_product_stream_definition(self, data_product_id=''): self.read_data_product(data_product_id) streams, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) for stream in streams: stream_defs, _ = self.clients.resource_registry.find_objects(subject=stream, predicate=PRED.hasStreamDefinition, id_only=True) if stream_defs: return stream_defs[0] def get_data_product_provenance(self, data_product_id=''): # Retrieve information that characterizes how this data was produced # Return in a dictionary self.provenance_results = {} data_product = self.data_product.read_one(data_product_id) validate_is_not_none(data_product, "Should have got a non empty data product") # todo: get the start time of this data product self.data_product._find_producers(data_product_id, self.provenance_results) return self.provenance_results def get_data_product_provenance_report(self, data_product_id=''): # Retrieve information that characterizes how this data was produced # Return in a dictionary self.provenance_results = self.get_data_product_provenance(data_product_id) results = '' results = self.data_product._write_product_provenance_report(data_product_id, self.provenance_results) return results ############################ # # Data Product Collections # ############################ def create_data_product_collection(self, data_product_id='', collection_name='', collection_description=''): """Define a set of an existing data products that represent an improvement in the quality or understanding of the information. """ validate_is_not_none(data_product_id, 'A data product identifier must be passed to create a data product version') dpv = DataProductVersion() dpv.name = 'base' dpv.description = 'the base version on which subsequent versions are built' dpv.data_product_id = data_product_id dp_collection_obj = IonObject(RT.DataProductCollection, name=collection_name, description=collection_description, version_list=[dpv]) data_product_collection_id, rev = self.clients.resource_registry.create(dp_collection_obj) self.clients.resource_registry.create_association( subject=data_product_collection_id, predicate=PRED.hasVersion, object=data_product_id) return data_product_collection_id def update_data_product_collection(self, data_product_collection=None): """@todo document this interface!!! @param data_product DataProductVersion @throws NotFound object with specified id does not exist """ validate_is_not_none(data_product_collection, "Should not pass in a None object") self.clients.resource_registry.update(data_product_collection) #TODO: any changes to producer? Call DataAcquisitionMgmtSvc? return def read_data_product_collection(self, data_product_collection_id=''): """Retrieve data product information @param data_product_collection_id str @retval data_product DataProductVersion """ result = self.clients.resource_registry.read(data_product_collection_id) validate_is_not_none(result, "Should not have returned an empty result") return result def delete_data_product_collection(self, data_product_collection_id=''): """Remove a version of an data product. @param data_product_collection_id str @throws BadRequest if object does not have _id or _rev attribute @throws NotFound object with specified id does not exist """ #check that all assoc data products are deleted dataproduct_objs, _ = self.clients.resource_registry.find_objects(subject=data_product_collection_id, predicate=PRED.hasVersion, object_type=RT.DataProduct, id_only=False) for dataproduct_obj in dataproduct_objs: if dataproduct_obj.lcstate != LCS.RETIRED: raise BadRequest("All Data Products in a collection must be deleted before the collection is deleted.") data_product_collection_obj = self.read_data_product_collection(data_product_collection_id) if data_product_collection_obj.lcstate != LCS.RETIRED: self.clients.resource_registry.retire(data_product_collection_id) def force_delete_data_product_collection(self, data_product_collection_id=''): # if not yet deleted, the first execute delete logic dp_obj = self.read_data_product_collection(data_product_collection_id) if dp_obj.lcstate != LCS.RETIRED: self.delete_data_product_collection(data_product_collection_id) self._remove_associations(data_product_collection_id) self.clients.resource_registry.delete(data_product_collection_id) def add_data_product_version_to_collection(self, data_product_id='', data_product_collection_id='', version_name='', version_description=''): dp_collection_obj =self.clients.resource_registry.read(data_product_collection_id) #retrieve the stream definition for both the new data product to add to this collection and the base data product for this collection new_data_product_obj = self.clients.resource_registry.read(data_product_id) new_data_product_streams, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) validate_is_not_none(new_data_product_streams, 'The data product to add to the collection must have an associated stream') new_data_product_streamdefs, _ = self.clients.resource_registry.find_objects(subject=new_data_product_streams[0], predicate=PRED.hasStreamDefinition, object_type=RT.StreamDefinition, id_only=True) base_data_product_id = dp_collection_obj.version_list[0].data_product_id base_data_product_obj = self.clients.resource_registry.read(base_data_product_id) base_data_product_streams, _ = self.clients.resource_registry.find_objects(subject=base_data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) validate_is_not_none(base_data_product_streams, 'The base data product in the collection must have an associated stream') base_data_product_streamdefs, _ = self.clients.resource_registry.find_objects(subject=base_data_product_streams[0], predicate=PRED.hasStreamDefinition, object_type=RT.StreamDefinition, id_only=True) if not self.clients.pubsub_management.compare_stream_definition(stream_definition1_id=new_data_product_streamdefs[0], stream_definition2_id=base_data_product_streamdefs[0]): raise BadRequest("All Data Products in a collection must have equivelent stream definitions.") #todo: validate that the spatial/temporal domain match the base data product dpv = DataProductVersion() dpv.name = version_name dpv.description = version_description dpv.data_product_id = data_product_id dp_collection_obj.version_list.append(dpv) self.clients.resource_registry.update(dp_collection_obj) self.clients.resource_registry.create_association( subject=data_product_collection_id, predicate=PRED.hasVersion, object=data_product_id) return def get_current_version(self, data_product_collection_id=''): data_product_collection_obj = self.clients.resource_registry.read(data_product_collection_id) count = len (data_product_collection_obj.version_list) dpv_obj = data_product_collection_obj.version_list[count - 1] return dpv_obj.data_product_id def get_base_version(self, data_product_collection_id=''): data_product_collection_obj = self.clients.resource_registry.read(data_product_collection_id) dpv_obj = data_product_collection_obj.version_list[0] return dpv_obj.data_product_id def execute_data_product_lifecycle(self, data_product_id="", lifecycle_event=""): """ declare a data_product to be in a given state @param data_product_id the resource id """ return self.data_product.advance_lcs(data_product_id, lifecycle_event) def get_last_update(self, data_product_id=''): """@todo document this interface!!! @param data_product_id str @retval last_update LastUpdate @throws NotFound Data product not found or cache for data product not found. """ from ion.processes.data.last_update_cache import CACHE_DATASTORE_NAME datastore_name = CACHE_DATASTORE_NAME db = self.container.datastore_manager.get_datastore(datastore_name) stream_ids,other = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) retval = {} for stream_id in stream_ids: try: lu = db.read(stream_id) retval[stream_id] = lu except NotFound: continue return retval def _get_dataset_id(self, data_product_id=''): # find datasets for the data product dataset_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, id_only=True) if not dataset_ids: raise NotFound('No Dataset is associated with DataProduct %s' % data_product_id) return dataset_ids[0] def _get_stream_id(self, data_product_id=''): # find datasets for the data product stream_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) return stream_ids[0] ############################ # # EXTENDED RESOURCES # ############################ def get_data_product_extension(self, data_product_id='', ext_associations=None, ext_exclude=None, user_id=''): #Returns an DataProductExtension object containing additional related information if not data_product_id: raise BadRequest("The data_product_id parameter is empty") extended_resource_handler = ExtendedResourceContainer(self) extended_product = extended_resource_handler.create_extended_resource_container( extended_resource_type=OT.DataProductExtension, resource_id=data_product_id, computed_resource_type=OT.DataProductComputedAttributes, ext_associations=ext_associations, ext_exclude=ext_exclude, user_id=user_id) #Loop through any attachments and remove the actual content since we don't need # to send it to the front end this way #TODO - see if there is a better way to do this in the extended resource frame work. if hasattr(extended_product, 'attachments'): for att in extended_product.attachments: if hasattr(att, 'content'): delattr(att, 'content') #extract the list of upstream data products from the provenance results dp_list = [] for key, value in extended_product.computed.provenance.value.iteritems(): for producer_id, dataprodlist in value['inputs'].iteritems(): for dataprod in dataprodlist: dp_list.append( self.clients.resource_registry.read(dataprod) ) extended_product.provenance_product_list = set(dp_list) #remove dups in list #set the data_ingestion_datetime from get_data_datetime if extended_product.computed.data_datetime.status == ComputedValueAvailability.PROVIDED : extended_product.data_ingestion_datetime = extended_product.computed.data_datetime.value[1] # divide up the active and past user subscriptions active = [] nonactive = [] for notification_obj in extended_product.computed.active_user_subscriptions.value: if notification_obj.lcstate == LCS.RETIRED: nonactive.append(notification_obj) else: active.append(notification_obj) extended_product.computed.active_user_subscriptions.value = active extended_product.computed.past_user_subscriptions.value = nonactive extended_product.computed.past_user_subscriptions.status = ComputedValueAvailability.PROVIDED extended_product.computed.number_active_subscriptions.value = len(active) extended_product.computed.number_active_subscriptions.status = ComputedValueAvailability.PROVIDED # replace list of lists with single list replacement_data_products = [] for inner_list in extended_product.process_input_data_products: if inner_list: for actual_data_product in inner_list: if actual_data_product: replacement_data_products.append(actual_data_product) extended_product.process_input_data_products = replacement_data_products return extended_product def get_data_datetime(self, data_product_id=''): # Returns a temporal bounds object of the span of data product life span (may exist without getting a granule) ret = IonObject(OT.ComputedListValue) ret.value = [] ret.status = ComputedValueAvailability.NOTAVAILABLE try: dataset_id = self._get_dataset_id(data_product_id) bounds = self.clients.dataset_management.dataset_bounds(dataset_id) if 'time' in bounds and len(bounds['time']) == 2 : log.debug("get_data_datetime bounds['time']: %s" % str(dataset_id)) timeStart = IonTime(bounds['time'][0] - IonTime.JAN_1970) timeEnd = IonTime(bounds['time'][1] - IonTime.JAN_1970) ret.value = [str(timeStart), str(timeEnd)] ret.status = ComputedValueAvailability.PROVIDED except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "Dataset for this Data Product could not be located" except Exception as e: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "Could not calculate time range for this data product" return ret # def get_data_ingestion_datetime(self, data_product_id=''): # # Returns a temporal bounds object of the earliest/most recent values ingested into in the data product # ret = IonObject(OT.ComputedStringValue) # ret.value = "" # ret.status = ComputedValueAvailability.NOTAVAILABLE # ret.reason = "FIXME. also, should datetime be stored as a string?" # # return ret def get_product_download_size_estimated(self, data_product_id=''): # Returns the size of the full data product if downloaded/presented in a given presentation form ret = IonObject(OT.ComputedIntValue) ret.value = 0 try: dataset_id = self._get_dataset_id(data_product_id) size_in_bytes = self.clients.dataset_management.dataset_size(dataset_id, in_bytes=False) ret.status = ComputedValueAvailability.PROVIDED ret.value = size_in_bytes except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "Dataset for this Data Product could not be located" except Exception as e: raise e return ret def get_stored_data_size(self, data_product_id=''): # Returns the storage size occupied by the data content of the resource, in bytes. ret = IonObject(OT.ComputedIntValue) ret.value = 0 try: dataset_id = self._get_dataset_id(data_product_id) size_in_bytes = self.clients.dataset_management.dataset_size(dataset_id, in_bytes=True) ret.status = ComputedValueAvailability.PROVIDED ret.value = size_in_bytes except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "Dataset for this Data Product could not be located" except Exception as e: raise e return ret def get_data_contents_updated(self, data_product_id=''): # the datetime when the contents of the data were last modified in any way. # This is distinct from modifications to the data product attributes ret = IonObject(OT.ComputedStringValue) ret.value = "" ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "FIXME. also, should datetime be stored as a string?" return ret def get_parameters(self, data_product_id=''): # The set of Parameter objects describing each variable in this data product ret = IonObject(OT.ComputedListValue) ret.value = [] try: stream_id = self._get_stream_id(data_product_id) if not stream_id: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "There is no Stream associated with this DataProduct" else: stream_def_ids, _ = self.clients.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) if not stream_def_ids: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "There is no StreamDefinition associated with this DataProduct" else: param_dict_ids, _ = self.clients.resource_registry.find_objects(subject=stream_def_ids[0], predicate=PRED.hasParameterDictionary, id_only=True) if not param_dict_ids: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "There is no ParameterDictionary associated with this DataProduct" else: ret.status = ComputedValueAvailability.PROVIDED ret.value = self.clients.dataset_management.read_parameter_contexts(param_dict_ids[0]) except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "FIXME: this message should say why the calculation couldn't be done" except Exception as e: raise e return ret def get_data_url(self, data_product_id=''): # The unique pointer to this set of data ret = IonObject(OT.ComputedStringValue) ret.value = "" erddap_host = CFG.get_safe('server.erddap.host','localhost') errdap_port = CFG.get_safe('server.erddap.port','8080') try: dataset_id = self._get_dataset_id(data_product_id) ret.value = string.join( ["http://", erddap_host, ":", str(errdap_port),"/erddap/griddap/", str(dataset_id), "_0.html"],'') ret.status = ComputedValueAvailability.PROVIDED log.debug("get_data_url: data_url: %s", ret.value) except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "Dataset for this Data Product could not be located" return ret def get_provenance(self, data_product_id=''): # Provides an audit trail for modifications to the original data ret = IonObject(OT.ComputedDictValue) try: ret.value = self.get_data_product_provenance(data_product_id) ret.status = ComputedValueAvailability.PROVIDED except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "Error in DataProuctMgmtService:get_data_product_provenance" except Exception as e: raise e return ret def get_number_active_subscriptions(self, data_product_id=''): # The number of current subscriptions to the data # Returns the storage size occupied by the data content of the resource, in bytes. ret = IonObject(OT.ComputedIntValue) ret.value = 0 try: ret.status = ComputedValueAvailability.PROVIDED raise NotFound #todo: ret.value = ??? except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "FIXME: this message should say why the calculation couldn't be done" except Exception as e: raise e return ret def get_active_user_subscriptions(self, data_product_id=''): # The UserSubscription objects for this data product ret = IonObject(OT.ComputedListValue) ret.value = [] try: ret.value = self.clients.user_notification.get_subscriptions(resource_id=data_product_id, include_nonactive=True) ret.status = ComputedValueAvailability.PROVIDED except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "Product subscription infromation not provided by UserNotificationService" except Exception as e: raise e return ret # def get_past_user_subscriptions(self, data_product_id=''): # # Provides information for users who have in the past acquired this data product, but for which that acquisition was terminated # ret = IonObject(OT.ComputedListValue) # ret.value = [] # try: # ret.status = ComputedValueAvailability.PROVIDED # raise NotFound #todo: ret.value = ??? # except NotFound: # ret.status = ComputedValueAvailability.NOTAVAILABLE # ret.reason = "FIXME: this message should say why the calculation couldn't be done" # except Exception as e: # raise e # # return ret def get_last_granule(self, data_product_id=''): # Provides information for users who have in the past acquired this data product, but for which that acquisition was terminated ret = IonObject(OT.ComputedDictValue) ret.value = {} try: dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "No dataset associated with this data product" else: replay_granule = self.clients.data_retriever.retrieve_last_data_points(dataset_ids[0], number_of_points=1) #replay_granule = self.clients.data_retriever.retrieve_last_granule(dataset_ids[0]) rdt = RecordDictionaryTool.load_from_granule(replay_granule) ret.value = {k : str(k) + ': ' + str(rdt[k].tolist()[0]) for k,v in rdt.iteritems()} # ret.value = {k : str(rdt[k].tolist()[0]) for k,v in rdt.iteritems()} ret.status = ComputedValueAvailability.PROVIDED except NotFound: ret.status = ComputedValueAvailability.NOTAVAILABLE ret.reason = "FIXME: this message should say why the calculation couldn't be done" except Exception as e: raise e return ret def get_recent_granules(self, data_product_id=''): # Provides information for users who have in the past acquired this data product, but for which that acquisition was terminated ret = IonObject(OT.ComputedDictValue) ret.value = {} ret.status = ComputedValueAvailability.NOTAVAILABLE # try: # dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) # if not dataset_ids: # ret.status = ComputedValueAvailability.NOTAVAILABLE # ret.reason = "No dataset associated with this data product" # else: # replay_granule = self.clients.data_retriever.retrieve_last_data_points(dataset_ids[0]) # rdt = RecordDictionaryTool.load_from_granule(replay_granule) # ret.value = {k : rdt[k].tolist() for k,v in rdt.iteritems()} # ret.status = ComputedValueAvailability.PROVIDED # except NotFound: # ret.status = ComputedValueAvailability.NOTAVAILABLE # ret.reason = "FIXME: this message should say why the calculation couldn't be done" # except Exception as e: # raise e return ret def get_is_persisted(self, data_product_id=''): # Returns True if data product is currently being persisted ret = IonObject(OT.ComputedIntValue) ret.value = self.is_persisted(data_product_id) ret.status = ComputedValueAvailability.PROVIDED return ret def _remove_associations(self, resource_id=''): """ delete all associations to/from a resource """ # find all associations where this is the subject _, obj_assns = self.clients.resource_registry.find_objects(subject=resource_id, id_only=True) # find all associations where this is the object _, sbj_assns = self.clients.resource_registry.find_subjects(object=resource_id, id_only=True) log.debug("_remove_associations will remove %s subject associations and %s object associations", len(sbj_assns), len(obj_assns)) for assn in obj_assns: log.debug("_remove_associations deleting object association %s", assn) self.clients.resource_registry.delete_association(assn) for assn in sbj_assns: log.debug("_remove_associations deleting subject association %s", assn) self.clients.resource_registry.delete_association(assn) # find all associations where this is the subject _, obj_assns = self.clients.resource_registry.find_objects(subject=resource_id, id_only=True) # find all associations where this is the object _, sbj_assns = self.clients.resource_registry.find_subjects(object=resource_id, id_only=True) log.debug("post-deletions, _remove_associations found %s subject associations and %s object associations", len(sbj_assns), len(obj_assns))
class DataProductManagementService(BaseDataProductManagementService): """ @author Bill Bollenbacher @file ion/services/sa/product/data_product_management_service.py @brief Implementation of the data product management service """ def on_init(self): self.override_clients(self.clients) def override_clients(self, new_clients): """ Replaces the service clients with a new set of them... and makes sure they go to the right places """ self.data_product = DataProductImpl(self.clients) def create_data_product(self, data_product=None, stream_definition_id=''): """ @param data_product IonObject which defines the general data product resource @param source_resource_id IonObject id which defines the source for the data @retval data_product_id """ # 1. Verify that a data product with same name does not already exist # 2. Validate that the data product IonObject does not contain an id_ element # 3. Create a new data product # - User must supply the name in the data product # Create will validate and register a new data product within the system # Validate - TBD by the work that Karen Stocks is driving with John Graybeal # Register - create and store a new DataProduct resource using provided metadata log.debug("DataProductManagementService:create_data_product: %s" % str(data_product)) data_product_id = self.data_product.create_one(data_product) #Create the stream if a stream definition is provided log.debug("DataProductManagementService:create_data_product: stream definition id = %s" % stream_definition_id) if stream_definition_id: stream_id = self.clients.pubsub_management.create_stream(name=data_product.name, description=data_product.description, stream_definition_id=stream_definition_id) log.debug("create_data_product: create stream stream_id %s" % stream_id) # Associate the Stream with the main Data Product self.data_product.link_stream(data_product_id, stream_id) # Return a resource ref to the new data product return data_product_id def read_data_product(self, data_product_id=''): """ method docstring """ # Retrieve all metadata for a specific data product # Return data product resource log.debug("DataProductManagementService:read_data_product: %s" % str(data_product_id)) result = self.data_product.read_one(data_product_id) return result def update_data_product(self, data_product=None): """ @todo document this interface!!! @param data_product DataProduct @throws NotFound object with specified id does not exist """ log.debug("DataProductManagementService:update_data_product: %s" % str(data_product)) self.data_product.update_one(data_product) #TODO: any changes to producer? Call DataAcquisitionMgmtSvc? return def delete_data_product(self, data_product_id=''): #Check if this data product is associated to a producer #todo: convert to impl call producer_ids = self.data_product.find_stemming_data_producer(data_product_id) if producer_ids: log.debug("unassigning data producers: %s") self.clients.data_acquisition_management.unassign_data_product(data_product_id) # Delete the data product self.clients.resource_registry.delete(data_product_id) return def find_data_products(self, filters=None): """ method docstring """ # Validate the input filter and augment context as required # Define set of resource attributes to filter on, change parameter from "filter" to include attributes and filter values. # potentially: title, keywords, date_created, creator_name, project, geospatial coords, time range # Call DM DiscoveryService to query the catalog for matches # Organize and return the list of matches with summary metadata (title, summary, keywords) return self.data_product.find_some(filters) def activate_data_product_persistence(self, data_product_id='', persist_data=True, persist_metadata=True): """Persist data product data into a data set @param data_product_id str @throws NotFound object with specified id does not exist """ # retrieve the data_process object data_product_obj = self.data_product.read_one(data_product_id) # get the Stream associated with this data set; if no stream then create one, if multiple streams then Throw streams = self.data_product.find_stemming_stream(data_product_id) if not streams: raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id)) #todo: what if there are multiple streams? stream = streams[0] log.debug("activate_data_product_persistence: stream = %s" % str(stream._id)) # Find THE ingestion configuration in the RR to create a ingestion configuration # todo: how are multiple ingest configs for a site managed? ingest_config_objs, _ = self.clients.resource_registry.find_resources(restype=RT.IngestionConfiguration, id_only=False) if len(ingest_config_objs) != 1: log.debug("activate_data_product_persistence: ERROR ingest_config_objs = %s" % str(ingest_config_objs)) raise BadRequest('Data Product must have one ingestion configuration %s' % str(data_product_id)) ingestion_configuration_obj = ingest_config_objs[0] log.debug("activate_data_product_persistence: ingestion_configuration_obj = %s" % str(ingestion_configuration_obj)) if data_product_obj.dataset_id: objs,_ = self.clients.resource_registry.find_objects(data_product_obj.dataset_id, PRED.hasIngestionConfiguration, RT.DatasetIngestionConfiguration, id_only=False) if not objs: log.debug('activate_data_product_persistence: Calling create_dataset_configuration for EXISTING Dataset', ) dataset_configuration_id = self.clients.ingestion_management.create_dataset_configuration( dataset_id=data_product_obj.dataset_id, archive_data=persist_data, archive_metadata=persist_metadata, ingestion_configuration_id=ingestion_configuration_obj._id) log.debug("activate_data_product_persistence: create_dataset_configuration = %s" % str(dataset_configuration_id)) else: dataset_configuration_obj = objs[0] dataset_configuration_obj.configuration.archive_data = persist_data dataset_configuration_obj.configuration.archive_metadata = persist_metadata # call ingestion management to update a dataset configuration log.debug('activate_data_product_persistence: Calling update_dataset_config', ) dataset_configuration_id = self.clients.ingestion_management.update_dataset_config(dataset_configuration_obj) log.debug("activate_data_product_persistence: update_dataset_config = %s" % str(dataset_configuration_id)) else: # create the dataset for the data # !!!!!!!! (Currently) The Datastore name MUST MATCH the ingestion configuration name!!! data_product_obj.dataset_id = self.clients.dataset_management.create_dataset(stream_id=stream, datastore_name=ingestion_configuration_obj.couch_storage.datastore_name, description=data_product_obj.description) log.debug("activate_data_product_persistence: create_dataset = %s" % str(data_product_obj.dataset_id)) self.update_data_product(data_product_obj) # Need to read again, because the _rev has changed. Otherwise error on update later. data_product_obj = self.clients.resource_registry.read(data_product_id) # call ingestion management to create a dataset configuration log.debug('activate_data_product_persistence: Calling create_dataset_configuration', ) dataset_configuration_id = self.clients.ingestion_management.create_dataset_configuration( dataset_id=data_product_obj.dataset_id, archive_data=persist_data, archive_metadata=persist_metadata, ingestion_configuration_id=ingestion_configuration_obj._id) log.debug("activate_data_product_persistence: create_dataset_configuration = %s" % str(dataset_configuration_id)) # save the dataset_configuration_id in the product resource? Can this be found via the stream id? data_product_obj.dataset_configuration_id = dataset_configuration_id self.update_data_product(data_product_obj) def suspend_data_product_persistence(self, data_product_id=''): """Suspend data product data persistnce into a data set, multiple options @param data_product_id str @param type str @throws NotFound object with specified id does not exist """ # retrieve the data_process object data_product_obj = self.clients.resource_registry.read(data_product_id) if data_product_obj is None: raise NotFound("Data Product %s does not exist" % data_product_id) if data_product_obj.dataset_configuration_id is None: raise NotFound("Data Product %s dataset configuration does not exist" % data_product_id) #retrieve the dataset configuation object so that attrs can be changed dataset_configuration_obj = self.clients.resource_registry.read(data_product_obj.dataset_configuration_id) if dataset_configuration_obj is None: raise NotFound("Dataset Configuration %s does not exist" % data_product_obj.dataset_configuration_id) #Set the dataset config archive data/metadata attrs to false dataset_configuration_obj.configuration.archive_data = False dataset_configuration_obj.configuration.archive_metadata = False ret = self.clients.ingestion_management.update_dataset_config(dataset_configuration_obj) log.debug("suspend_data_product_persistence: deactivate = %s" % str(ret)) def execute_data_product_lifecycle(self, data_product_id="", lifecycle_event=""): """ declare a data_product to be in a given state @param data_product_id the resource id """ return self.data_product.advance_lcs(data_product_id, lifecycle_event) def get_last_update(self, data_product_id=''): """@todo document this interface!!! @param data_product_id str @retval last_update LastUpdate @throws NotFound Data product not found or cache for data product not found. """ from ion.processes.data.last_update_cache import CACHE_DATASTORE_NAME datastore_name = CACHE_DATASTORE_NAME db = self.container.datastore_manager.get_datastore(datastore_name) stream_ids,other = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) retval = {} for stream_id in stream_ids: try: lu = db.read(stream_id) retval[stream_id] = lu except NotFound: continue return retval
class DataProductManagementService(BaseDataProductManagementService): """ @author Bill Bollenbacher @file ion/services/sa/product/data_product_management_service.py @brief Implementation of the data product management service """ def on_init(self): self.override_clients(self.clients) def override_clients(self, new_clients): """ Replaces the service clients with a new set of them... and makes sure they go to the right places """ self.data_product = DataProductImpl(self.clients) def create_data_product(self, data_product=None, stream_definition_id='', parameter_dictionary = None): """ @param data_product IonObject which defines the general data product resource @param source_resource_id IonObject id which defines the source for the data @retval data_product_id """ # Create will validate and register a new data product within the system validate_is_not_none(parameter_dictionary, 'A parameter dictionary must be passed to register a data product') validate_is_not_none(stream_definition_id, 'A stream definition id must be passed to register a data product') validate_is_not_none(data_product, 'A data product (ion object) must be passed to register a data product') #-------------------------------------------------------------------------------- # Register - create and store a new DataProduct resource using provided metadata #-------------------------------------------------------------------------------- data_product_id, rev = self.clients.resource_registry.create(data_product) log.debug("data product id: %s" % data_product_id) #-------------------------------------------------------------------------------- # Register - create and store the default DataProductVersion resource using provided metadata #-------------------------------------------------------------------------------- #create the initial/default data product version data_product_version = DataProductVersion() data_product_version.name = data_product.name data_product_version.description = data_product.description dpv_id, rev = self.clients.resource_registry.create(data_product_version) self.clients.resource_registry.create_association( subject=data_product_id, predicate=PRED.hasVersion, object=dpv_id) #----------------------------------------------------------------------------------------------- #Create the stream and a dataset if a stream definition is provided #----------------------------------------------------------------------------------------------- log.debug("DataProductManagementService:create_data_product: stream definition id = %s" % stream_definition_id) #if stream_definition_id: stream_id = self.clients.pubsub_management.create_stream(name=data_product.name, description=data_product.description, stream_definition_id=stream_definition_id) # Associate the Stream with the main Data Product and with the default data product version self.data_product.link_stream(data_product_id, stream_id) self.clients.resource_registry.create_association( subject=dpv_id, predicate=PRED.hasStream, object=stream_id) # create a dataset... data_set_id = self.clients.dataset_management.create_dataset( name= 'data_set_%s' % stream_id, stream_id=stream_id, parameter_dict=parameter_dictionary, temporal_domain=data_product.temporal_domain, spatial_domain=data_product.spatial_domain) log.debug("DataProductManagementService:create_data_product: data_set_id = %s" % str(data_set_id)) data_set_obj = self.clients.dataset_management.read_dataset(data_set_id) log.debug("DataProductManagementService:create_data_product: data_set_obj = %s" % str(data_set_obj)) # link dataset with data product. This creates the association in the resource registry self.data_product.link_data_set(data_product_id=data_product_id, data_set_id=data_set_id) self.clients.resource_registry.create_association( subject=dpv_id, predicate=PRED.hasDataset, object=data_set_id) # Return the id of the new data product return data_product_id def read_data_product(self, data_product_id=''): """ method docstring """ # Retrieve all metadata for a specific data product # Return data product resource log.debug("DataProductManagementService:read_data_product: %s" % str(data_product_id)) result = self.data_product.read_one(data_product_id) return result def update_data_product(self, data_product=None): """ @todo document this interface!!! @param data_product DataProduct @throws NotFound object with specified id does not exist """ validate_is_instance(data_product, DataProduct) log.debug("DataProductManagementService:update_data_product: %s" % str(data_product)) self.data_product.update_one(data_product) #TODO: any changes to producer? Call DataAcquisitionMgmtSvc? return def delete_data_product(self, data_product_id=''): #Check if this data product is associated to a producer #todo: convert to impl call producer_ids = self.data_product.find_stemming_data_producer(data_product_id) for producer_id in producer_ids: log.debug("DataProductManagementService:delete_data_product unassigning data producers: %s") self.clients.data_acquisition_management.unassign_data_product(producer_id, data_product_id) #-------------------------------------------------------------------------------- # remove stream associations #-------------------------------------------------------------------------------- stream_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) for stream_id in stream_ids: self.data_product.unlink_stream(data_product_id=data_product_id, stream_id=stream_id) #-------------------------------------------------------------------------------- # remove dataset associations #-------------------------------------------------------------------------------- dataset_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, id_only=True) for dataset_id in dataset_ids: self.data_product.unlink_data_set(data_product_id=data_product_id, data_set_id=dataset_id) # # delete the hasOutputDataProduct associations link # dp_assocs = self.clients.resource_registry.find_associations(data_product_id, PRED.hasOutputProduct) # for dp_assoc in dp_assocs: # self.clients.resource_registry.delete_association(dp_assoc) # # delete the hasInputDataProduct associations link # dp_assocs = self.clients.resource_registry.find_associations(data_product_id, PRED.hasInputProduct) # for dp_assoc in dp_assocs: # self.clients.resource_registry.delete_association(dp_assoc) #-------------------------------------------------------------------------------- # Delete the data product #-------------------------------------------------------------------------------- data_product_obj = self.read_data_product(data_product_id) validate_is_instance(data_product_obj, DataProduct) if data_product_obj.lcstate != LCS.RETIRED: self.data_product.delete_one(data_product_id) #self.clients.resource_registry.delete(data_product_id) #self.clients.resource_registry.set_lifecycle_state(data_product_id, LCS.RETIRED) return def hard_delete_data_product(self, data_product_id=''): return def find_data_products(self, filters=None): """ method docstring """ # Validate the input filter and augment context as required # Define set of resource attributes to filter on, change parameter from "filter" to include attributes and filter values. # potentially: title, keywords, date_created, creator_name, project, geospatial coords, time range # Call DM DiscoveryService to query the catalog for matches # Organize and return the list of matches with summary metadata (title, summary, keywords) return self.data_product.find_some(filters) def activate_data_product_persistence(self, data_product_id=''): """Persist data product data into a data set @param data_product_id str @throws NotFound object with specified id does not exist """ #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.data_product.read_one(data_product_id) validate_is_not_none(data_product_obj, "The data product id should correspond to a valid registered data product.") #-------------------------------------------------------------------------------- # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw #-------------------------------------------------------------------------------- streams = self.data_product.find_stemming_stream(data_product_id) if not streams: raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id)) stream_id = streams[0]._id log.debug("activate_data_product_persistence: stream = %s" % str(stream_id)) #----------------------------------------------------------------------------------------- # grab the ingestion configuration id from the data_product in order to use to persist it #----------------------------------------------------------------------------------------- if data_product_obj.dataset_configuration_id: ingestion_configuration_id = data_product_obj.dataset_configuration_id else: ingestion_configuration_id = self.clients.ingestion_management.list_ingestion_configurations(id_only=True)[0] log.debug("ingestion_configuration_id for data product: %s" % ingestion_configuration_id) #-------------------------------------------------------------------------------- # persist the data stream using the ingestion config id and stream id #-------------------------------------------------------------------------------- # find datasets for the data product dataset_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, id_only=True) log.debug("Found the following datasets for the data product: %s" % dataset_ids) for dataset_id in dataset_ids: try: dataset_id = self.clients.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_configuration_id, dataset_id=dataset_id) except BadRequest: log.warning("Activate data product may have resulted in a duplicate attempt to associate a stream to a dataset") log.warning("Please note that creating a data product calls the create_dataset() method which already makes an association") log.debug("activate_data_product_persistence: dataset_id = %s" % str(dataset_id)) # link data set to data product #self.data_product.link_data_set(data_product_id, dataset_id) #-------------------------------------------------------------------------------- # todo: dataset_configuration_obj contains the ingest config for now... # Update the data product object #-------------------------------------------------------------------------------- data_product_obj.dataset_configuration_id = ingestion_configuration_id self.update_data_product(data_product_obj) def suspend_data_product_persistence(self, data_product_id=''): """Suspend data product data persistence into a data set, multiple options @param data_product_id str @param type str @throws NotFound object with specified id does not exist """ log.debug("suspend_data_product_persistence: data_product_id = %s" % str(data_product_id)) #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.clients.resource_registry.read(data_product_id) validate_is_not_none(data_product_obj, 'Should not have been empty') validate_is_instance(data_product_obj, DataProduct) if data_product_obj.dataset_configuration_id is None: raise NotFound("Data Product %s dataset configuration does not exist" % data_product_id) log.debug("Data product: %s" % data_product_obj) #-------------------------------------------------------------------------------- # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw #streams = self.data_product.find_stemming_stream(data_product_id) #-------------------------------------------------------------------------------- stream_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) if not stream_ids: raise BadRequest('Data Product %s must have one stream associated' % str(data_product_id)) for stream_id in stream_ids: log.debug("suspend_data_product_persistence: stream = %s" % str(stream_id)) log.debug("data_product_obj.dataset_configuration_id: %s" % data_product_obj.dataset_configuration_id) ret = self.clients.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=data_product_obj.dataset_configuration_id) log.debug("suspend_data_product_persistence: deactivate = %s" % str(ret)) #-------------------------------------------------------------------------------- # detach the dataset from this data product #-------------------------------------------------------------------------------- dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) for dataset_id in dataset_ids: self.data_product.unlink_data_set(data_product_id, dataset_id) def get_data_product_provenance(self, data_product_id=''): # Retrieve information that characterizes how this data was produced # Return in a dictionary provenance_results = {} current_data_product = data_product_id log.debug("DataProductManagementService:get_data_product_provenance: %s" % str(current_data_product)) data_product = self.data_product.read_one(data_product_id) validate_is_not_none(data_product, "Should have got a non empty data product") # todo: get the start time of this data product producer_ids = self._find_producers(data_product_id) while producer_ids: for producer_id in producer_ids: provenance_results[current_data_product] = { 'producer': producer_id, 'inputs': self._find_producer_in_products(producer_id) } producer_ids = self._find_producers(current_data_product) return {} def _find_producers(self, data_product_id=''): source_ids = [] # get the link to the DataProducer resource producer_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataProducer, id_only=True) for producer_id in producer_ids: # get the link to that resources parent DataProducer parent_ids, _ = self.clients.resource_registry.find_objects(subject=producer_id, predicate=PRED.hasParent, id_only=True) for parent_id in parent_ids: # get the producer that this DataProducer represents nxt_producer_ids, _ = self.clients.resource_registry.find_objects(subject=parent_id, predicate=PRED.hasDataProducer, id_only=True) for nxt_producer_id in nxt_producer_ids: nxt_producer_obj = self.clients.resource_registry.read(nxt_producer_id) #todo: check the type of resource; instrument, data process or extDataset #nxt_producer_obj source_ids.append(nxt_producer_id) log.debug("DataProductManagementService:_find_producers: %s" % str(source_ids)) return source_ids def _find_producer_in_products(self, producer_id=''): # get the link to the inout DataProduct resource product_ids, _ = self.clients.resource_registry.find_objects( subject=producer_id, predicate=PRED.hasInputDataProduct, id_only=True) return product_ids def create_data_product_version(self, data_product_id='', data_product_version=None): """Define a new version of an existing set of information that represent an inprovement in the quality or understanding of the information. Only creates the second and higher versions of a DataProduct. The first version is implicit in the crate_data_product() operation. @param data_product_id str @param data_product_version DataProductVersion @retval data_product_version_id str @throws BadRequest if object does not have _id or _rev attribute @throws NotFound object with specified id does not exist """ validate_is_not_none(data_product_id, 'A data product identifier must be passed to create a data product version') validate_is_not_none(data_product_version, 'A data product version (ion object) must be passed to create a data product version') data_product_version_id, rev = self.clients.resource_registry.create(data_product_version) self.clients.resource_registry.create_association( subject=data_product_id, predicate=PRED.hasVersion, object=data_product_version_id) #----------------------------------------------------------------------------------------------- #Create the stream and a dataset for the new version #----------------------------------------------------------------------------------------------- stream_id = self.clients.pubsub_management.create_stream(name=data_product_version.name, description=data_product_version.description) # Associate the Stream with the main Data Product and with the default data product version self.clients.resource_registry.create_association( subject=data_product_version_id, predicate=PRED.hasStream, object=stream_id) #get the parameter_dictionary assoc with the original dataset dataset_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataset, object_type=RT.DataSet, id_only=True) if not dataset_ids: raise BadRequest('No Dataset associated with the DataProduct %s' % str(data_product_id)) log.debug("DataProductManagementService:create_data_product_version base_dataset_id: %s", str(dataset_ids[0])) base_dataset_obj = self.clients.dataset_management.read_dataset(str(dataset_ids[0])) log.debug("DataProductManagementService:create_data_product_version base_dataset_obj: %s" % str(base_dataset_obj)) # create a dataset for this version. must have same parameter dictionary and spatial/temporal domain as original data product. data_set_id = self.clients.dataset_management.create_dataset( name= 'data_set_%s' % stream_id, stream_id=stream_id, parameter_dict=base_dataset_obj.parameter_dictionary, temporal_domain=base_dataset_obj.temporal_domain, spatial_domain=base_dataset_obj.spatial_domain) self.clients.resource_registry.create_association(subject=data_product_version_id, predicate=PRED.hasDataset, object=data_set_id) return data_product_version_id def update_data_product_version(self, data_product_version=None): """@todo document this interface!!! @param data_product DataProductVersion @throws NotFound object with specified id does not exist """ validate_is_not_none(data_product_version, "Should not pass in a None object") log.debug("DataProductManagementService:update_data_product_version: %s" % str(data_product_version)) self.clients.resource_registry.update(data_product_version) #TODO: any changes to producer? Call DataAcquisitionMgmtSvc? return def read_data_product_version(self, data_product_version_id=''): """Retrieve data product information @param data_product_version_id str @retval data_product DataProductVersion """ log.debug("DataProductManagementService:read_data_product_version: %s" % str(data_product_version_id)) result = self.clients.resource_registry.read(data_product_version_id) validate_is_not_none(result, "Should not have returned an empty result") return result def delete_data_product_version(self, data_product_version_id=''): """Remove a version of an data product. @param data_product_version_id str @throws BadRequest if object does not have _id or _rev attribute @throws NotFound object with specified id does not exist """ pass def execute_data_product_lifecycle(self, data_product_id="", lifecycle_event=""): """ declare a data_product to be in a given state @param data_product_id the resource id """ return self.data_product.advance_lcs(data_product_id, lifecycle_event) def get_last_update(self, data_product_id=''): """@todo document this interface!!! @param data_product_id str @retval last_update LastUpdate @throws NotFound Data product not found or cache for data product not found. """ from ion.processes.data.last_update_cache import CACHE_DATASTORE_NAME datastore_name = CACHE_DATASTORE_NAME db = self.container.datastore_manager.get_datastore(datastore_name) stream_ids,other = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) retval = {} for stream_id in stream_ids: try: lu = db.read(stream_id) retval[stream_id] = lu except NotFound: continue return retval