Beispiel #1
0
    def delete_from_file(self,
                         tech=None,
                         stage=None,
                         dataset_name=None,
                         dataset_id=None):
        """Delete this or related datasets from file

        Specify arguments relative to this dataset to find datasets which will be deleted.
        """
        # Use existing names as default
        tech = self.vars["tech"] if tech is None else tech
        stage = self.vars["stage"] if stage is None else stage
        dataset_name = self.dataset_name if dataset_name is None else dataset_name
        if dataset_id is None:
            dataset_id = self.dataset_id
        else:
            dataset_id = _data.parse_dataset_id(self.rundate, tech, stage,
                                                dataset_name, dataset_id)

        dataset_id = {dataset_id} if isinstance(dataset_id,
                                                (float,
                                                 int)) else set(dataset_id)
        ids_to_delete = dataset_id & set(
            _data.list_dataset_ids(self.rundate, tech, dataset_name, stage,
                                   dataset_name))
        if not ids_to_delete:
            return

        # Open JSON and HDF5 file and remove datasets
        file_vars = dict(self.vars, tech=tech, stage=stage)
        json_path = files.path("dataset_json", file_vars=file_vars)
        with files.open_path(json_path, mode="rt", write_log=False) as f_json:
            json_all = json.load(f_json)
        with files.open_datafile("dataset_hdf5",
                                 file_vars=file_vars,
                                 mode="a",
                                 write_log=False) as f_hdf5:
            for id_to_delete in ids_to_delete:
                name = "{name}/{id:04d}".format(name=dataset_name,
                                                id=id_to_delete)
                del json_all[name]
                del f_hdf5[name]
                log.debug(
                    "Deleted {name} from dataset {tech}-{stage} at {directory}",
                    name=name,
                    tech=tech,
                    stage=stage,
                    directory=json_path.parent,
                )

        with files.open_path(json_path, mode="wt", write_log=False) as f_json:
            json.dump(json_all, f_json)

        # Delete files if all datasets are deleted
        if not any(["/" in k for k in json_all.keys()]):
            json_path.unlink()
            files.path("dataset_hdf5", file_vars=file_vars).unlink()
Beispiel #2
0
    def write(self, write_level=None):
        """Write a dataset to file

        A dataset is stored on disk in two files, one JSON-file and one HDF5-file. Typically the HDF5-file is great for
        handling numeric data, while JSON is more flexible. The actual writing of the data is handled by the individual
        datatype table-classes. These classes are free to choose how they divide the data between the JSON- and
        HDF5-files, as long as they are able to recover all the data.
        """
        json_path = files.path("dataset_json", file_vars=self.vars)
        log.debug(f"Write dataset {self.vars['tech']}-{self.vars['stage']} to disk at {json_path.parent}")

        # Read write level from config
        write_level = config.tech.get("write_level", value=write_level).as_enum("write_level").name

        # Read existing data in JSON-file
        try:
            with files.open_path(json_path, mode="rt", write_log=False) as f_json:
                json_all = json.load(f_json)
        except FileNotFoundError:
            json_all = dict()
        json_all.setdefault(self.name, dict())
        json_data = json_all[self.name]

        # Figure out which tables have data
        tables = [t for t in self._data.values() if t.get_fields(write_level)]

        # Open HDF5-file
        with files.open_datafile("dataset_hdf5", file_vars=self.vars, mode="a", write_log=False) as f_hdf5:
            if self.name in f_hdf5:
                del f_hdf5[self.name]
            hdf5_data = f_hdf5.create_group(self.name)

            # Write data for each table (HDF5-data are automatically written to disk)
            for table in tables:
                table.write(json_data, hdf5_data, write_level)

        # Store metadata in JSON-data
        json_data["_version"] = where.__version__
        json_data["_num_obs"] = self.num_obs
        json_data["_tables"] = {tbl.name: tbl.datatype for tbl in tables}
        json_data["_units"] = {tbl.name: tbl._units for tbl in tables}
        json_data["_write_levels"] = {tbl.name: tbl._write_level_strings for tbl in tables}
        json_data["_meta"] = self.meta
        json_data["_vars"] = self.vars

        # Store last dataset_id written to
        json_all.setdefault(self.dataset_name, dict())["_last_dataset_id"] = self.dataset_id

        # Write JSON-data to file
        with files.open_path(json_path, mode="wt", write_log=False) as f_json:
            json.dump(json_all, f_json)
Beispiel #3
0
    def read(self):
        """Read a dataset from file

        A dataset is stored on disk in two files, one JSON-file and one HDF5-file. Typically the HDF5-file is great for
        handling numeric data, while JSON is more flexible. The actual reading of the data is handled by the individual
        datatype table-classes. The dispatch to the correct class is done by functions defined in the
        :func:`Dataset._read`-method which is called by :mod:`where.data._data` when Dataset is first imported.
        """
        # Open and read JSON-file
        json_path = files.path("dataset_json", file_vars=self.vars)
        with files.open_path(json_path, mode="rt", write_log=False) as f_json:
            json_all = json.load(f_json)
        if self.name not in json_all:
            raise FileNotFoundError("Dataset {} not found in file {}".format(self.name, json_path))

        log.debug(f"Read dataset {self.vars['tech']}-{self.vars['stage']} from disk at {json_path.parent}")
        json_data = json_all[self.name]
        self._num_obs = json_data["_num_obs"]
        tables = json_data["_tables"]

        # Open HDF5-file
        with files.open_datafile("dataset_hdf5", file_vars=self.vars, mode="r", write_log=False) as f_hdf5:
            hdf5_data = f_hdf5[self.name]

            # Read data for each table by dispatching to read function based on datatype
            for table, dtype in tables.items():
                read_func = getattr(self, "_read_" + dtype)
                read_func(table, json_data, hdf5_data)

        # Add meta and vars properties
        self.meta = json_data.get("_meta", dict())
        self.vars = json_data.get("_vars", self.vars)
Beispiel #4
0
    def read_data(self):
        """Read data from a data file and parse the contents
        """
        # Get chain of parsers
        parsers_chain = iter(self.setup_parser())
        parser = next(parsers_chain)  # Pointing to first parser
        cache = dict(line_num=0)

        with files.open_path(self.file_path,
                             mode="rt",
                             encoding=self.file_encoding) as fid:
            # Get iterators for current and next line
            line_iter, next_line_iter = itertools.tee(fid)
            next(next_line_iter, None)

            # Iterate over all file lines including last line by using zip_longest
            for line, next_line in itertools.zip_longest(
                    line_iter, next_line_iter):
                cache["line_num"] += 1
                self.parse_line(line.rstrip(), cache, parser)

                # Skip to next parser
                if next_line is None or parser.end_marker(
                        line.rstrip(), cache["line_num"], next_line):
                    if parser.end_callback is not None:
                        parser.end_callback(cache)
                    cache = dict(line_num=0)
                    try:
                        parser = next(parsers_chain)
                    except StopIteration:
                        break
Beispiel #5
0
    def read_data(self):
        """Parse the vgosdb wrapper file

        self.data will be populated with information from the netcdf files
        """
        with files.open_path(self.file_path, mode="rt") as fid:
            self._parse_file(fid)

        self._organize_data()
Beispiel #6
0
    def read_data(self):
        """Read data from datafiles

        This is a basic implementation that parses data from one file as specified by the file_key and vars properties.

        For more advanced uses, e.g. reading data from several files, this method should be overridden. In that case,
        make sure file dependencies are appended to the self.dependencies-list.

        """
        self.dependencies.append(self.file_path)
        is_zipped = files.is_path_zipped(self.file_path)
        if files.empty_file(self.file_path):
            log.warn(f"File {self.file_path} is empty.")
        with files.open_path(self.file_path, mode="rt",
                             is_zipped=is_zipped) as fid:
            self.parse_file(fid)
Beispiel #7
0
def _get_sp3_file_version(file_path):
    """ Get SP3 file version for a given file path

    Args:
        file_path (str):    SP3 file path

    Returns:
        str:            SP3 file version number
    """
    with files.open_path(file_path, mode="rt") as infile:
        version = infile.readline().split()[0]

    if len(version) < 2 or version[1] not in "acd":
        log.fatal(
            f"Unknown SP3 format {version!r} is used in file {file_path}")

    return version[1]
Beispiel #8
0
    def read_data(self):
        """Read data from a Sinex file and parse the contents

        First the whole Sinex file is read and the requested blocks are stored in self._sinex. After the file has been
        read, a parser is called on each block so that self.data is properly populated.
        """
        # Read raw sinex data to self._sinex from file
        with files.open_path(self.file_path, mode="rb") as fid:
            if self._header:
                self.parse_header_line(next(fid))  # Header must be first line
            self.parse_blocks(fid)

        # Apply parsers to raw sinex data, the information returned by parsers is stored in self.data
        for sinex_block in self.sinex_blocks:
            if sinex_block.parser and sinex_block.marker in self._sinex:
                params = self._sinex.get("__params__", dict()).get(sinex_block.marker, ())
                data = sinex_block.parser(self._sinex.get(sinex_block.marker), *params)
                if data is not None:
                    self.data[sinex_block.marker] = data
Beispiel #9
0
 def read_data(self):
     with files.open_path(self.file_path, mode="rt") as fid:
         self._parse_file(fid)