def remove_tags(self, tags=None): """Remove the specified keys from tags dictionary of this dataset. :param tags: The list of keys to remove. :type tags: builtin.list[str] :return: The updated dataset object. :rtype: typing.Union[azureml.data.TabularDataset, azureml.data.FileDataset] """ if not self._registration or not self._registration.workspace or not self._registration.registered_id: return UserErrorException( 'To remove tags from this dataset it must be registered.') workspace = self._registration.workspace def request(): updatedTags = deepcopy(self._registration.tags) for item in set(tags).intersection(updatedTags): del updatedTags[item] return _restclient(workspace).dataset.update_dataset( workspace.subscription_id, workspace.resource_group, workspace.name, dataset_id=self._registration.registered_id, new_dataset_dto=_dataset_to_dto( self, self.name, self.description, updatedTags, self._registration.registered_id), custom_headers=self._get_telemetry_headers()) success, result = _make_request(request) if not success: raise result result_dto = _dto_to_dataset(workspace, result) self._registration.tags = result_dto.tags return result_dto
def _ensure_saved_internal(self, workspace): if not self._registration or not self._registration.saved_id: # only call service when dataset is not saved yet def request(): return _restclient(workspace).dataset.ensure_saved( subscription_id=workspace.subscription_id, resource_group_name=workspace.resource_group, workspace_name=workspace.name, dataset=_dataset_to_saved_dataset_dto(self), custom_headers=self._get_telemetry_headers()) success, result = _make_request(request) if not success: raise result saved_dataset = _saved_dataset_dto_to_dataset(workspace, result) # modify _definition using service response self._definition = saved_dataset._definition # modify self._registration.saved_id using service response if self._registration: self._registration.saved_id = saved_dataset._registration.saved_id else: self._registration = saved_dataset._registration return self._registration.saved_id
def register(self, workspace, name, description=None, tags=None, create_new_version=False): """Register the dataset to the provided workspace. :param workspace: The workspace to register the dataset. :type workspace: azureml.core.Workspace :param name: The name to register the dataset with. :type name: str :param description: A text description of the dataset. Defaults to None. :type description: str :param tags: Dictionary of key value tags to give the dataset. Defaults to None. :type tags: dict[str, str] :param create_new_version: Boolean to register the dataset as a new version under the specified name. :type create_new_version: bool :return: The registered dataset object. :rtype: typing.Union[azureml.data.TabularDataset, azureml.data.FileDataset] """ def request(): return _restclient(workspace).dataset.register( workspace.subscription_id, workspace.resource_group, workspace.name, dataset_dto=_dataset_to_dto(self, name, description, tags), if_exists_ok=create_new_version, update_definition_if_exists=create_new_version, custom_headers=self._get_telemetry_headers()) def handle_error(error): status_code = error.response.status_code if status_code == 409: return UserErrorException(( 'There is already a dataset registered under name "{}". ' 'Specify `create_new_version=True` to register the dataset as a new version. ' 'Use `update`, `add_tags`, or `remove_tags` to change only the description or tags.' ).format(name)) if status_code == 400: regex = re.compile( r'has been registered as (.+):([0-9]+) \(name:version\).', re.IGNORECASE) matches = regex.findall(error.message) if len(matches) == 1: existing_name, existing_version = matches[0] return UserErrorException(( 'An identical dataset had already been registered, which can ' 'be retrieved with `Dataset.get_by_name(workspace, name="{}", version={})`.' ).format(existing_name, existing_version)) success, result = _make_request(request, handle_error) if not success: raise result dataset = _dto_to_dataset(workspace, result) self.__class__._track_output_reference_lineage(dataset) return dataset
def _get_by_id(workspace, id): def request_for_registered(): return _restclient( workspace ).dataset.get_datasets_by_saved_dataset_id( subscription_id=workspace.subscription_id, resource_group_name=workspace.resource_group, workspace_name=workspace.name, saved_dataset_id=id, page_size= 1, # just need the 1st (can only be more than one for dataset created in the old age) custom_headers=_custom_headers) success, result = _make_request(request_for_registered) if success and len(result.value) == 1: dataset = _dto_to_dataset(workspace, result.value[0]) else: def request_for_unregistered(): return _restclient(workspace).dataset.get_by_id( subscription_id=workspace.subscription_id, resource_group_name=workspace.resource_group, workspace_name=workspace.name, id=id, resolve_legacy_id=True, custom_headers=_custom_headers) def handle_error(error): if error.response.status_code == 404: return UserErrorException( 'Cannot find dataset with id "{}" in the workspace.'. format(id)) success, result = _make_request(request_for_unregistered, handle_error) if not success: raise result dataset = _saved_dataset_dto_to_dataset(workspace, result) warn_deprecated_blocks(dataset) return dataset
def add_tags(self, tags=None): """Add key value pairs to the tags dictionary of this dataset. :param tags: The dictionary of tags to add. :type tags: dict[str, str] :return: The updated dataset object. :rtype: typing.Union[azureml.data.TabularDataset, azureml.data.FileDataset] """ if not self._registration or not self._registration.workspace or not self._registration.registered_id: return UserErrorException( 'To add tags to this dataset it must be registered.') workspace = self._registration.workspace def request(): duplicate_keys = [] for item in set(tags).intersection(self._registration.tags): if self._registration.tags[item] != tags[item]: duplicate_keys.append(item) if len(duplicate_keys) > 0: raise UserErrorException( ('Dataset already contains different values for tags ' 'with the following keys {}').format(duplicate_keys)) updatedTags = deepcopy(self._registration.tags) updatedTags.update(tags) return _restclient(workspace).dataset.update_dataset( workspace.subscription_id, workspace.resource_group, workspace.name, dataset_id=self._registration.registered_id, new_dataset_dto=_dataset_to_dto( self, self.name, self.description, updatedTags, self._registration.registered_id), custom_headers=self._get_telemetry_headers()) success, result = _make_request(request) if not success: raise result result_dto = _dto_to_dataset(workspace, result) self._registration.tags = result_dto.tags return result_dto
def update(self, description=None, tags=None): """Perform an in-place update of the dataset. :param description: The new description to use for the dataset. This description replaces the existing description. Defaults to existing description. To clear description, enter empty string. :type description: str :param tags: A dictionary of tags to update the dataset with. These tags replace existing tags for the dataset. Defaults to existing tags. To clear tags, enter empty dictionary. :type tags: dict[str, str] :return: The updated dataset object. :rtype: typing.Union[azureml.data.TabularDataset, azureml.data.FileDataset] """ if not self._registration or not self._registration.workspace or not self._registration.registered_id: return UserErrorException( 'To update this dataset it must be registered.') workspace = self._registration.workspace def request(): updated_description = description updated_tags = tags if description is None: updated_description = self._registration.description if tags is None: updated_tags = self._registration.tags return _restclient(workspace).dataset.update_dataset( workspace.subscription_id, workspace.resource_group, workspace.name, dataset_id=self._registration.registered_id, new_dataset_dto=_dataset_to_dto( self, self.name, updated_description, updated_tags, self._registration.registered_id), custom_headers=self._get_telemetry_headers()) success, result = _make_request(request) if not success: raise result result_dto = _dto_to_dataset(workspace, result) self._registration.tags = result_dto.tags self._registration.description = result_dto.description return result_dto
def _register(self, workspace, name, description=None, tags=None, create_new_version=False): def request(): return _restclient(workspace).dataset.register( workspace.subscription_id, workspace.resource_group, workspace.name, dataset_dto=_dataset_to_dto(self, name, description, tags), if_exists_ok=create_new_version, update_definition_if_exists=create_new_version, custom_headers=self._get_telemetry_headers()) def handle_error(error): status_code = error.response.status_code if status_code == 409: return UserErrorException(( 'There is already a dataset registered under name "{}". ' + 'Specify `create_new_version=True` to register the dataset as a new version.' ).format(name)) if status_code == 400: regex = re.compile( r'has been registered as (.+):([0-9]+) \(name:version\).', re.IGNORECASE) matches = regex.findall(error.message) if len(matches) == 1: existing_name, existing_version = matches[0] return UserErrorException(( 'An identical dataset had already been registered, which can ' + 'be retrieved with `Dataset.get_by_name(workspace, name="{}", version={})`.' ).format(existing_name, existing_version)) success, result = _make_request(request, handle_error) if not success: raise result return _dto_to_dataset(workspace, result)
def unregister_all_versions(self): """Unregister all versions under the registration name of this dataset from the workspace. .. remarks:: The operation does not change any source data. """ if not self._registration or not self._registration.workspace or not self._registration.registered_id: return # no-op if dataset is not registered workspace = self._registration.workspace def request(): return _restclient(workspace).dataset.unregister_dataset( workspace.subscription_id, workspace.resource_group, workspace.name, self.name, custom_headers=_custom_headers) success, result = _make_request(request) if not success: raise result self._registration = None
def _get_by_name(workspace, name, version): if version != 'latest' and version is not None: try: version = int(version) except: raise UserErrorException( 'Invalid value {} for version. Version value must be number or "latest".' .format(version)) else: version = None def request(): dto = _restclient(workspace).dataset.get_dataset_by_name( workspace.subscription_id, workspace.resource_group, workspace.name, dataset_name=name, version_id=version, custom_headers=_custom_headers) return dto def handle_error(error): if error.response.status_code == 404: return UserErrorException( 'Cannot find dataset registered with name "{}"{} in the workspace.' .format( name, '' if version == 'latest' else ' (version: {})'.format(version))) success, result = _make_request(request, handle_error) if not success: raise result dataset = _dto_to_dataset(workspace, result) warn_deprecated_blocks(dataset) return dataset