def rename(self, rundate=None, tech=None, stage=None, dataset_name=None, dataset_id=None, **kwargs): """Rename a dataset Renames the dataset. In particular, this means that if the dataset is written to file it will be written to a different file (or different place in the same file). All arguments are optional. If they are not given, they keep their existing value. Args: rundate: Date, the model run date. tech: String, the technique. stage: String, the stage. dataset_name: String, the name of the dataset. dataset_id: Int, id of the dataset. """ # Set rundate if rundate is not None: self.rundate = rundate # Use existing names as default tech = self.vars["tech"] if tech is None else tech stage = self.vars["stage"] if stage is None else stage dataset_name = self.dataset_name if dataset_name is None else dataset_name if dataset_id is None: dataset_id = self.dataset_id else: dataset_id = _data.parse_dataset_id(self.rundate, tech, stage, dataset_name, dataset_id) # Update names self.name = "{name}/{id:04d}".format(name=dataset_name, id=dataset_id) kwargs.setdefault("session", dataset_name) self.vars.update(dict(tech=tech, stage=stage, dataset_name=dataset_name, dataset_id=dataset_id, **kwargs))
def delete_from_file(self, tech=None, stage=None, dataset_name=None, dataset_id=None): """Delete this or related datasets from file Specify arguments relative to this dataset to find datasets which will be deleted. """ # Use existing names as default tech = self.vars["tech"] if tech is None else tech stage = self.vars["stage"] if stage is None else stage dataset_name = self.dataset_name if dataset_name is None else dataset_name if dataset_id is None: dataset_id = self.dataset_id else: dataset_id = _data.parse_dataset_id(self.rundate, tech, stage, dataset_name, dataset_id) dataset_id = {dataset_id} if isinstance(dataset_id, (float, int)) else set(dataset_id) ids_to_delete = dataset_id & set( _data.list_dataset_ids(self.rundate, tech, dataset_name, stage, dataset_name)) if not ids_to_delete: return # Open JSON and HDF5 file and remove datasets file_vars = dict(self.vars, tech=tech, stage=stage) json_path = files.path("dataset_json", file_vars=file_vars) with files.open_path(json_path, mode="rt", write_log=False) as f_json: json_all = json.load(f_json) with files.open_datafile("dataset_hdf5", file_vars=file_vars, mode="a", write_log=False) as f_hdf5: for id_to_delete in ids_to_delete: name = "{name}/{id:04d}".format(name=dataset_name, id=id_to_delete) del json_all[name] del f_hdf5[name] log.debug( "Deleted {name} from dataset {tech}-{stage} at {directory}", name=name, tech=tech, stage=stage, directory=json_path.parent, ) with files.open_path(json_path, mode="wt", write_log=False) as f_json: json.dump(json_all, f_json) # Delete files if all datasets are deleted if not any(["/" in k for k in json_all.keys()]): json_path.unlink() files.path("dataset_hdf5", file_vars=file_vars).unlink()
def __init__(self, rundate, tech, stage, dataset_name, dataset_id, empty=False, **kwargs): """Create a new Dataset or read an existing one Note: Be aware that the implementation is dependent on ``self._fields`` being the first attribute to be set. See :func:`__setattr__` for more information. Args: rundate: Date, the model run date. tech: String, the technique. stage: String, the stage. dataset_name: String, the name of the dataset. dataset_id: Int, id of the dataset. empty: Boolean, if False (default) will read dataset from disk if available. """ self._fields = dict() self._data = dict() self._num_obs = 0 self._default_field_suffix = None self._kwargs = kwargs self._kwargs.setdefault("session", dataset_name) # TODO: Can this be removed? self.vars = dict( config.program_vars(**dict( kwargs, rundate=rundate, tech_name=tech, stage=stage, dataset_name=dataset_name, dataset_id=str(dataset_id), ))) self.vars.update(**kwargs) self.rundate = rundate dataset_id = _data.parse_dataset_id(rundate, tech, stage, dataset_name, dataset_id) self.name = "{name}/{id:04d}".format(name=dataset_name, id=dataset_id) self.meta = dict() # Try to read dataset from disk unless explicitly told to create an empty dataset if not empty: try: self.read() except FileNotFoundError: pass