def register(self, model: ResourceType): """ Create a new element of the collection or update an existing element. If the input model has an ID that corresponds to an existing object in the database, then that object will be updated. Otherwise a new object will be created. Parameters ---------- model: DataConcepts The DataConcepts object. Returns ------- DataConcepts A copy of the registered object as it now exists in the database. """ if self.dataset_id is None: raise RuntimeError("Must specify a dataset in order to register a data model object.") path = self._get_path() # How do we prepare a citrine-python object to be the json in a POST request? # Right now, that method scrubs out None values and replaces top-level objects with links. # Eventually, we want to replace it with the following: # dumped_data = dumps(loads(dumps(model.dump()))) # This dumps the object to a dictionary (model.dump()), and then to a string (dumps()). # But this string is still nested--because it's a dictionary, taurus.dumps() does not know # how to replace the objects with link-by-uids. loads() converts this string into nested # taurus objects, and then the final dumps() converts that to a json-ready string in which # all of the object references have been replaced with link-by-uids. dumped_data = replace_objects_with_links(scrub_none(model.dump())) data = self.session.post_resource(path, dumped_data) full_model = self.build(data) model.session = self.session return full_model
def test_failed_replacement(): """An object that does not have a type and a uids dictionary should not be replaced.""" json = dict( object=dict(some_field='material_run', uids={ 'my_id': '1', 'id': '17' })) assert json == replace_objects_with_links(json) # no type field json = dict(object=dict(type='material_run', uids='a uid string')) assert json == replace_objects_with_links(json) # uids is not a dictionary json = dict( object=dict(type='material_run', some_field={ 'my_id': '1', 'id': '17' })) assert json == replace_objects_with_links(json) # no uids field
def register(self, model: ResourceType, dry_run=False): """ Create a new element of the collection or update an existing element. If the input model has an ID that corresponds to an existing object in the database, then that object will be updated. Otherwise a new object will be created. Only the top-level object in `model` itself is written to the database with this method. References to other objects are persisted as links, and the object returned by this method has all instances of data objects replaced by instances of LinkByUid. Registering an object which references other objects does NOT implicitly register those other objects. Rather, those other objects' values are ignored, and the pre-existence of objects with their IDs is asserted before attempting to write `model`. Parameters ---------- model: ResourceType The DataConcepts object. dry_run: bool Whether to actually register the item or run a dry run of the register operation. Dry run is intended to be used for validation. Default: false Returns ------- ResourceType A copy of the registered object as it now exists in the database. """ if self.dataset_id is None: raise RuntimeError( "Must specify a dataset in order to register a data model object." ) path = self._get_path() params = {'dry_run': dry_run} # How do we prepare a citrine-python object to be the json in a POST request? # Right now, that method scrubs out None values and replaces top-level objects with links. # Eventually, we want to replace it with the following: # dumped_data = dumps(loads(dumps(model.dump()))) # This dumps the object to a dictionary (model.dump()), and then to a string (dumps()). # But this string is still nested--because it's a dictionary, GEMDJson.dumps() does not # know how to replace the objects with link-by-uids. loads() converts this string into # nested gemd objects, and then the final dumps() converts that to a json-ready string # in which all of the object references have been replaced with link-by-uids. temp_scope = str(uuid4()) scope = temp_scope if dry_run else CITRINE_SCOPE GEMDJson(scope=scope).dumps( model) # This apparent no-op populates uids dumped_data = replace_objects_with_links(scrub_none(model.dump())) recursive_foreach( model, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids data = self.session.post_resource(path, dumped_data, params=params) full_model = self.build(data) return full_model
def validate_templates(self, model: DataObjectResourceType, object_template: Optional[ObjectTemplateResourceType] = None, ingredient_process_template: Optional[ProcessTemplate] = None)\ -> List[ValidationError]: """ Validate a data object against its templates. Validates against provided object templates (passed in as parameters) and stored attribute templates linked on the data object. :param model: the data object to validate :param object_template: optional object template to validate against :param ingredient_process_template: optional process template to validate ingredient against. Ignored unless data object is an IngredientSpec or IngredientRun. :return: List[ValidationError] of validation errors encountered. Empty if successful. """ path = self._get_path(ignore_dataset=True) + "/validate-templates" temp_scope = str(uuid4()) GEMDJson(scope=temp_scope).dumps( model) # This apparent no-op populates uids dumped_data = replace_objects_with_links(scrub_none(model.dump())) recursive_foreach( model, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids request_data = {"dataObject": dumped_data} if object_template is not None: request_data["objectTemplate"] = \ replace_objects_with_links(scrub_none(object_template.dump())) if ingredient_process_template is not None: request_data["ingredientProcessTemplate"] = \ replace_objects_with_links(scrub_none(ingredient_process_template.dump())) try: self.session.put_resource(path, request_data) return [] except BadRequest as e: if e.api_error is not None and e.api_error.validation_errors: return e.api_error.validation_errors raise e
def register_all(self, models: List[ResourceType], dry_run=False) -> List[ResourceType]: """ [ALPHA] Create or update each model in models. This method has the same behavior as `register`, except that all no models will be written if any one of them is invalid. Using this method should yield significant improvements to write speed over separate calls to `register`. Parameters ---------- models: List[ResourceType] The objects to be written. dry_run: bool Whether to actually register the objects or run a dry run of the register operation. Dry run is intended to be used for validation. Default: false Returns ------- List[ResourceType] Each object model as it now exists in the database. The order and number of models is guaranteed to be the same as originally specified. """ if self.dataset_id is None: raise RuntimeError( "Must specify a dataset in order to register a data model object." ) path = self._get_path() params = {'dry_run': dry_run} temp_scope = str(uuid4()) scope = temp_scope if dry_run else CITRINE_SCOPE json = GEMDJson(scope=scope) [json.dumps(x) for x in models] # This apparent no-op populates uids objects = [ replace_objects_with_links(scrub_none(model.dump())) for model in models ] recursive_foreach( models, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids response_data = self.session.put_resource(path + '/batch', json={'objects': objects}, params=params) return [self.build(obj) for obj in response_data['objects']]
def register(self, model: ResourceType, dry_run=False): """ Create a new element of the collection or update an existing element. If the input model has an ID that corresponds to an existing object in the database, then that object will be updated. Otherwise a new object will be created. Only the top-level object in `model` itself is written to the database with this method. References to other objects are persisted as links, and the object returned by this method has all instances of data objects replaced by instances of LinkByUid. Registering an object which references other objects does NOT implicitly register those other objects. Rather, those other objects' values are ignored, and the pre-existence of objects with their IDs is asserted before attempting to write `model`. Parameters ---------- model: ResourceType The DataConcepts object. dry_run: bool Whether to actually register the item or run a dry run of the register operation. Dry run is intended to be used for validation. Default: false Returns ------- ResourceType A copy of the registered object as it now exists in the database. """ if self.dataset_id is None: raise RuntimeError( "Must specify a dataset in order to register a data model object." ) path = self._get_path() params = {'dry_run': dry_run} temp_scope = str(uuid4()) scope = temp_scope if dry_run else CITRINE_SCOPE GEMDJson(scope=scope).dumps( model) # This apparent no-op populates uids dumped_data = replace_objects_with_links(scrub_none(model.dump())) recursive_foreach( model, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids data = self.session.post_resource(path, dumped_data, params=params) full_model = self.build(data) return full_model
def test_simple_replacement(): """A top-level object should turn into a link-by-uid.""" json = dict(key='value', object=dict(type='material_run', uids={ 'my_id': '1', 'id': '17' })) replaced_json = replace_objects_with_links(json) assert replaced_json == { 'key': 'value', 'object': { 'type': 'link_by_uid', 'scope': 'id', 'id': '17' } }
def test_nested_replacement(): """A list of objects should turn into a list of link-by-uids.""" json = dict(object=[ dict(type='material_run', uids={'my_id': '1'}), dict(type='material_run', uids={'my_id': '2'}) ]) replaced_json = replace_objects_with_links(json) assert replaced_json == { 'object': [{ 'type': 'link_by_uid', 'scope': 'my_id', 'id': '1' }, { 'type': 'link_by_uid', 'scope': 'my_id', 'id': '2' }] }
def async_update(self, model: ResourceType, *, dry_run: bool = False, wait_for_response: bool = True, timeout: float = 2 * 60, polling_delay: float = 1.0) -> Optional[UUID]: """ [ALPHA] Update a particular element of the collection with data validation. Update a particular element of the collection, doing a deeper check to ensure that the dependent data objects are still with the (potentially) changed constraints of this change. This will allow you to make bounds and allowed named/labels changes to templates. Parameters ---------- model: ResourceType The DataConcepts object. dry_run: bool Whether to actually update the item or run a dry run of the update operation. Dry run is intended to be used for validation. Default: false wait_for_response: Whether to poll for the eventual response. This changes the return type (see below). timeout: How long to poll for the result before giving up. This is expressed in (fractional) seconds. polling_delay: How long to delay between each polling retry attempt. Returns ------- Optional[UUID] If wait_for_response if True, then this call will poll the backend, waiting for the eventual job result. In the case of successful validation/update, a return value of None is provided which indicates success. In the case of a failure validating or processing the update, an exception (JobFailureError) is raised and an error message is logged with the underlying reason of the failure. If wait_for_response if False, A job ID (of type UUID) is returned that one can use to poll for the job completion and result with the :func:`~citrine.resources.DataConceptsCollection.poll_async_update_job` method. """ temp_scope = str(uuid4()) GEMDJson(scope=temp_scope).dumps( model) # This apparent no-op populates uids dumped_data = replace_objects_with_links(scrub_none(model.dump())) recursive_foreach( model, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids scope = CITRINE_SCOPE id = dumped_data['uids']['id'] if self.dataset_id is None: raise RuntimeError("Must specify a dataset in order to update " "a data model object with data validation.") url = self._get_path() + \ "/" + scope + "/" + id + "/async" response_json = self.session.put_resource(url, dumped_data, params={'dry_run': dry_run}) job_id = response_json["job_id"] if wait_for_response: self.poll_async_update_job(job_id, timeout=timeout, polling_delay=polling_delay) # That worked, nothing returned in this case return None else: # TODO: use JobSubmissionResponse here instead return job_id