コード例 #1
0
ファイル: ingest_pipeline.py プロジェクト: tsdat/tsdat
    def read_and_persist_raw_files(
        self, filepaths: Union[str, List[str]]
    ) -> Dict[str, xr.Dataset]:
        """------------------------------------------------------------------------------------
        Renames the provided raw files according to our naming conventions and returns a
        mapping of the renamed filepaths to raw `xr.Dataset` objects.

        Args:
            file_paths (List[str]): The path(s) to the raw file(s).

        Returns:
            Dict[str, xr.Dataset]: The mapping of raw filepaths to raw xr.Dataset objects.

        ------------------------------------------------------------------------------------"""
        raw_mapping: Dict[str, xr.Dataset] = dict()

        if isinstance(filepaths, str):
            filepaths = [filepaths]

        for filepath in filepaths:

            extracted = self.storage.handlers.read(file=filepath, name=filepath)
            if not extracted:
                warnings.warn(f"Couldn't use extracted raw file: {filepath}")
                continue

            new_filename = DSUtil.get_raw_filename(extracted, filepath, self.config)
            self.storage.save(filepath, new_filename=new_filename)

            if isinstance(extracted, xr.Dataset):
                extracted = {new_filename: extracted}

            raw_mapping.update(extracted)

        return raw_mapping
コード例 #2
0
ファイル: ingest_pipeline.py プロジェクト: calumkenny/tsdat
    def read_and_persist_raw_files(self, file_paths: List[str]) -> List[str]:
        """Renames the provided raw files according to ME Data Standards file
        naming conventions for raw data files, and returns a list of the paths
        to the renamed files.

        :param file_paths: A list of paths to the original raw files.
        :type file_paths: List[str]
        :return: A list of paths to the renamed files.
        :rtype: List[str]
        """
        raw_dataset_mapping = {}

        if isinstance(file_paths, str):
            file_paths = [file_paths]

        for file_path in file_paths:

            # read the raw file into a dataset
            with self.storage.tmp.fetch(file_path) as tmp_path:
                dataset = FileHandler.read(tmp_path)

                # Don't use dataset if no FileHandler is registered for it
                if dataset is not None:
                    # create the standardized name for raw file
                    new_filename = DSUtil.get_raw_filename(
                        dataset, tmp_path, self.config)

                    # add the raw dataset to our dictionary
                    raw_dataset_mapping[new_filename] = dataset

                    # save the raw data to storage
                    self.storage.save(tmp_path, new_filename)

                else:
                    warnings.warn(
                        f"Couldn't use extracted raw file: {tmp_path}")

        return raw_dataset_mapping