def read_and_persist_raw_files( self, filepaths: Union[str, List[str]] ) -> Dict[str, xr.Dataset]: """------------------------------------------------------------------------------------ Renames the provided raw files according to our naming conventions and returns a mapping of the renamed filepaths to raw `xr.Dataset` objects. Args: file_paths (List[str]): The path(s) to the raw file(s). Returns: Dict[str, xr.Dataset]: The mapping of raw filepaths to raw xr.Dataset objects. ------------------------------------------------------------------------------------""" raw_mapping: Dict[str, xr.Dataset] = dict() if isinstance(filepaths, str): filepaths = [filepaths] for filepath in filepaths: extracted = self.storage.handlers.read(file=filepath, name=filepath) if not extracted: warnings.warn(f"Couldn't use extracted raw file: {filepath}") continue new_filename = DSUtil.get_raw_filename(extracted, filepath, self.config) self.storage.save(filepath, new_filename=new_filename) if isinstance(extracted, xr.Dataset): extracted = {new_filename: extracted} raw_mapping.update(extracted) return raw_mapping
def read_and_persist_raw_files(self, file_paths: List[str]) -> List[str]: """Renames the provided raw files according to ME Data Standards file naming conventions for raw data files, and returns a list of the paths to the renamed files. :param file_paths: A list of paths to the original raw files. :type file_paths: List[str] :return: A list of paths to the renamed files. :rtype: List[str] """ raw_dataset_mapping = {} if isinstance(file_paths, str): file_paths = [file_paths] for file_path in file_paths: # read the raw file into a dataset with self.storage.tmp.fetch(file_path) as tmp_path: dataset = FileHandler.read(tmp_path) # Don't use dataset if no FileHandler is registered for it if dataset is not None: # create the standardized name for raw file new_filename = DSUtil.get_raw_filename( dataset, tmp_path, self.config) # add the raw dataset to our dictionary raw_dataset_mapping[new_filename] = dataset # save the raw data to storage self.storage.save(tmp_path, new_filename) else: warnings.warn( f"Couldn't use extracted raw file: {tmp_path}") return raw_dataset_mapping