Ejemplo n.º 1
0
    def parse(self):
        """Parse data

        This is a basic implementation that carries out the whole pipeline of reading and parsing datafiles including
        calculating secondary data.

        Returns:
            Parser: The parsed data
        """
        log.dev(
            f"where.parsers.parser is deprecated. Use where.parsers._parser or one of it's subclasses instead."
        )

        if self.file_path is None:
            self.file_path = config.files.path(self.file_key,
                                               file_vars=self.vars,
                                               download_missing=True)

        parser_package, parser_name = self.__module__.rsplit(".", maxsplit=1)
        with Timer("Finish {} ({}) - {} in".format(parser_name, parser_package,
                                                   self.file_key)):
            if self.data_available:
                self.read_data()

            if not self.data_available:  # May have been set to False by self.read_data()
                log.warn(
                    f"No data found by {self.__class__.__name__} for {self.rundate.strftime(config.FMT_date)} "
                    f"(was looking for {self.file_path})")
                return self

            self.calculate_data()
            dependencies.add(*self.dependencies, label=self.file_key)

        return self
Ejemplo n.º 2
0
def call(
    package_name: str,
    plugin_name: str,
    part: Optional[str] = None,
    prefix: Optional[str] = None,
    plugin_logger: Optional[Callable[[str], None]] = None,
    **plugin_args: Any,
) -> Any:
    """Call one plug-in

    Args:
        package_name:   Name of package containing plug-ins.
        plugin_name:    Name of the plug-in, i.e. the module containing the plug-in.
        part:           Name of function to call within the plug-in (optional).
        prefix:         Prefix of the plug-in name, used if the plug-in name is not found (optional).
        plugin_logger:  Function used for logging (optional).
        plugin_args:    Named arguments passed on to the plug-in.

    Returns:
        Return value of the plug-in.
    """
    plugin = get(package_name, plugin_name, part, prefix)

    # Log message about calling plug-in
    if plugin_logger is not None:
        plugin_logger(f"Start plug-in {plugin.name!r} in {package_name!r}")

    # Add dependency to the plug-in
    dependencies.add(plugin.file_path, label=f"plugin:{package_name}")

    # Call plug-in
    return plugin.function(**plugin_args)
Ejemplo n.º 3
0
    def _read_data(self):
        """Read data needed by this Reference Frame for calculating positions of sites

        Delegates to _read_data_<self.version> to read the actual data.

        Returns:
            Dict:  Dictionary containing data about each site defined in this reference frame.
        """
        trf = Configuration("trf_custom")
        trf.profiles = config.analysis.get("tech",
                                           value=self.version,
                                           default="").list
        trf_path = files.path("trf-custom")
        trf_local_path = files.path("trf-custom_local")

        trf.update_from_file(trf_path)
        dependencies.add(trf_path, label="trf")
        if trf_local_path.exists():
            trf.update_from_file(trf_local_path)
            dependencies.add(trf_local_path, label="trf")

        data = dict()
        for section in trf.sections:
            info = {
                k: v
                for k, v in section.as_dict().items() if not k == "pos_itrs"
            }
            info["pos"] = np.array(section.pos_itrs.list, dtype=float)
            data[section.name] = info

        return data
Ejemplo n.º 4
0
    def __init__(self, time, ephemerides):
        """Create an Ephemerides-instance that calculates ephemerides for the given time epochs.

        It is possible to not specify the time epochs when creating the instance (set `time=None`). In this case `time`
        must be supplied as a parameter to each call calculating ephemerides.

        The SPK-file is read and parsed at the creation of this instance. In particular the names and ids of the
        available objects are read.

        Args:
            time (Time):           Time epochs for which to calculate ephemerides.
            ephemerides (String):  Name of ephemerides to use.
        """
        self.time = time
        self.ephemerides = ephemerides

        # Open the SPK-file corresponding to the ephemerides
        eph_filepath = config.files.path(
            "ephemerides",
            file_vars=dict(ephemerides=ephemerides),
            download_missing=True)
        self._spk = SPK.open(eph_filepath)  # TODO: Close file in destructor
        dependencies.add(eph_filepath, label="ephemerides")

        # Parse segments in SPK file
        self._names, self._segments = self._parse_segments()
Ejemplo n.º 5
0
def parse_key(file_key,
              file_vars=None,
              parser_name=None,
              use_cache=True,
              **parser_args):
    """Parse a file given in the Where file-list and return parsed data

    By specifying a `file_key`. The file_key is looked up in the file list to figure out which file that should be
    parsed. The name of the parser will also be looked up in the file configuration. The dictionary `file_vars` may be
    specified if variables are needed to figure out the correct file path from the configuration. The following file
    keys are available:

    {doc_file_keys}

    Data can be retrieved either as Dictionaries, Pandas DataFrames or Where Datasets by using one of the methods
    `as_dict`, `as_dataframe` or `as_dataset`.

    Example:
        > df = parsers.parse_key('center_of_mass', file_vars=dict(satellite='Lageos')).as_dataset()

    Args:
        file_key (String):     Used to look up parser_name and file_path in the Where file configuration.
        file_vars (Dict):      Additional file variables used when looking up file path in configuration.
        parser_name (String):  Name of parser to use. Default is to use parser named in the file list.
        use_cache (Boolean):   Whether to use a cache to avoid parsing the same file several times.
        parser_args:           Input arguments to the parser.

    Returns:
        Parser:  Parser with the parsed data
    """
    # Read parser_name from config.files if it is not given
    parser_name = config.files.get(section=file_key,
                                   key="parser",
                                   value=parser_name).str
    if not parser_name:
        log.warn(
            f"No parser found for {file_key!r} in {', '.join(config.files.sources)}"
        )

    # Figure out the file path
    file_vars = dict() if file_vars is None else file_vars
    download_missing = config.where.files.download_missing.bool
    file_path = config.files.path(file_key,
                                  file_vars=file_vars,
                                  download_missing=download_missing,
                                  use_aliases=True)
    dependencies.add(file_path, label=file_key)
    parser_args.setdefault("encoding", config.files.encoding(file_key))

    # Use the Midgard parser function to create parser and parse data
    return parse_file(parser_name,
                      file_path,
                      use_cache=use_cache,
                      timer_logger=log.time,
                      **parser_args)
Ejemplo n.º 6
0
def run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs):
    # Skip stages where no dependencies have changed
    dep_path = config.files.path("depends",
                                 file_vars={
                                     **kwargs, "stage": stage
                                 })
    if not (dependencies.changed(dep_path)
            or util.check_options("-F", "--force")):
        log.info(
            f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}"
        )
        return

    if dset is None:
        try:
            # Read dataset from disk if it exists
            dset = dataset.Dataset.read(rundate=rundate,
                                        pipeline=pipeline,
                                        stage=prev_stage,
                                        label="last",
                                        **kwargs)
        except (OSError, ValueError):
            # Create emtpy dataset
            dset = dataset.Dataset(rundate=rundate,
                                   pipeline=pipeline,
                                   **kwargs)

    # Set up dependencies. Add dependencies to previous stage and config file
    dependencies.init(dep_path)
    if prev_stage is not None:
        dependencies.add(config.files.path("depends",
                                           file_vars={
                                               **kwargs, "stage": prev_stage
                                           }),
                         label="depends")
    dependencies.add(*config.tech.sources, label="config")
    # Delete old datasets for this stage
    dset.delete_stage(stage, **kwargs)

    # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by
    # default stages return None)
    plugins.call(package_name=__name__,
                 plugin_name=pipeline,
                 part=stage,
                 stage=stage,
                 dset=dset,
                 plugin_logger=log.info)
    dependencies.write()

    return dset
Ejemplo n.º 7
0
    def _read(self, dset_raw):
        """Read SP3 orbit file data and save it in a Dataset

        In addition to the given date, we read data for the day before and after. This is needed to carry out correct
        orbit interpolation at the start and end of a day.

        TODO:
        How well fits the orbits from day to day? Is it necessary to align the orbits?

        Args:
            dset_raw (Dataset):   Dataset representing raw data from apriori orbit files
        """
        date_to_read = dset_raw.analysis["rundate"] - timedelta(
            days=self.day_offset)
        file_paths = list()

        # Loop over days to read
        while date_to_read <= dset_raw.analysis["rundate"] + timedelta(
                days=self.day_offset):
            if self.file_path is None:
                file_path = config.files.path(
                    self.file_key, file_vars=config.date_vars(date_to_read))
            else:
                file_path = self.file_path

            log.debug(f"Parse precise orbit file {file_path}")

            # Generate temporary Dataset with orbit file data
            dset_temp = dataset.Dataset(rundate=date_to_read,
                                        pipeline=dset_raw.vars["pipeline"],
                                        stage="temporary")
            parser = parsers.parse(parser_name="orbit_sp3",
                                   file_path=file_path,
                                   rundate=date_to_read)
            parser.write_to_dataset(dset_temp)
            file_paths.append(str(parser.file_path))
            dependencies.add(str(parser.file_path),
                             label=self.file_key)  # Used for output writing

            # Extend Dataset dset_raw with temporary Dataset
            date = date_to_read.strftime("%Y-%m-%d")
            dset_raw.update_from(
                dset_temp) if dset_raw.num_obs == 0 else dset_raw.extend(
                    dset_temp, meta_key=date)
            dset_raw.meta.add("file_path", file_paths, section="parser")

            date_to_read += timedelta(days=1)

        return dset_raw
Ejemplo n.º 8
0
def get_sp3c_or_sp3d(rundate, file_path=None, **kwargs):
    """Use either OrbitSp3cParser or OrbitSp3dParser for reading orbit files in SP3c or SP3d format

    Firstly the version of SP3 file is read. Based on the read version number it is decided, which Parser should be
    used.

    Args:
        rundate (date):           The model run date.
        file_path (str):          Optional path to orbit-file to parse.
    """
    version = _get_sp3_file_version(file_path)
    dependencies.add(file_path, label="gnss_orbit_sp3")  # MURKS_hjegei: Better solution?

    if version in "ac":
        return orbit_sp3c.OrbitSp3cParser(file_path=file_path, **kwargs)
    elif version.startswith("d"):
        return orbit_sp3d.OrbitSp3dParser(file_path=file_path, **kwargs)
    else:
        log.fatal(f"Unknown SP3 format {version!r} is used in file {file_path}")
Ejemplo n.º 9
0
def write_sinex(dset):
    """Write normal equations of session solution in SINEX format.

    Args:
        dset:  Dataset, data for a model run.
    """
    # Add dependency to sinex_blocks-module
    dependencies.add(sinex_blocks.__file__)

    if config.tech.analysis_status.status.str == "bad":
        log.info("Bad session. Not producing SINEX.")
        return
    with files.open("output_sinex", file_vars=dset.vars, mode="wt") as fid:
        sinex = sinex_blocks.SinexBlocks(dset, fid)
        sinex.header_line()
        for block in config.tech[WRITER].blocks.list:
            block_name, *args = block.split(":")
            sinex.write_block(block_name, *args)
        sinex.end_line()
Ejemplo n.º 10
0
def run(rundate, pipeline, session=""):
    """Run a Where pipeline for a given date and session

    Args:
        rundate:   Rundate of analysis.
        pipeline:  Pipeline used for analysis.
        session:   Session in analysis.
    """
    if not setup.has_config(rundate, pipeline, session):
        log.fatal(
            f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}"
        )

    # Set up session config
    config.init(rundate=rundate, tech_name=pipeline, session=session)

    # Set up prefix for console logger and start file logger
    log_cfg = config.where.log
    prefix = f"{pipeline.upper()} {session} {rundate:%Y-%m-%d}"
    log.init(log_level=log_cfg.default_level.str, prefix=prefix)
    if log_cfg.log_to_file.bool:
        log.file_init(
            file_path=files.path("log"),
            log_level=log_cfg.default_level.str,
            prefix=prefix,
            rotation=log_cfg.number_of_log_backups.int,
        )

    # Read which stages to skip from technique configuration file.
    skip_stages = config.tech.get("skip_stages", default="").list

    # Register filekey suffix
    filekey_suffix = config.tech.filekey_suffix.list
    if filekey_suffix:
        config.files.profiles = filekey_suffix

    # Find which stages we will run analysis for
    # TODO: Specify stage_list in config
    stage_list = [s for s in stages(pipeline) if s not in skip_stages]

    # Start file logging and reporting
    reports.report.init(sessions=[session])
    reports.report.start_session(session)
    reports.report.text("header", session.replace("_", " ").title())

    # Update analysis config and file variables
    config.set_analysis(rundate=rundate,
                        tech=pipeline,
                        analysis=pipeline,
                        session=session)
    config.set_file_vars(file_vars())

    # Log the name of the session
    log.blank()  # Empty line for visual clarity
    log.info(f"Start session {session}")
    session_timer = timer(f"Finish session {session} in")
    session_timer.start()

    # Run stages, keep track of previous stage
    dset = None
    dep_fast = config.where.files.dependencies_fast.bool
    for prev_stage, stage in zip([None] + stage_list, stage_list):

        # Skip stages where no dependencies have changed
        dep_path = files.path("depends", file_vars=dict(stage=stage))
        if not (dependencies.changed(dep_path, fast_check=dep_fast)
                or util.check_options("-F", "--force")):
            log.info(
                f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}"
            )
            continue
        elif dset is None:
            # Create or read dataset
            empty = stage == stage_list[0]
            dset = dataset.Dataset(rundate,
                                   tech=pipeline,
                                   stage=prev_stage,
                                   dataset_name=session,
                                   dataset_id="last",
                                   empty=empty)

        # Report on the stage
        reports.report.start_section(stage)
        reports.report.text("header", stage.replace("_", " ").title())
        if prev_stage:
            log.blank()  # Empty line for visual clarity

        # Set up dependencies. Add dependencies to previous stage and config file
        dependencies.init(dep_path, fast_check=dep_fast)
        dependencies.add(files.path("depends",
                                    file_vars=dict(stage=prev_stage)),
                         label="depends")
        dependencies.add(*config.tech.sources, label="config")

        # Delete old datasets for this stage
        dset.delete_from_file(stage=stage, dataset_id="all")

        # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by
        # default stages return None)
        plugins.call(package_name=__name__,
                     plugin_name=pipeline,
                     part=stage,
                     stage=stage,
                     dset=dset,
                     plugin_logger=log.info)
        dependencies.write()
        if dset.num_obs == 0:
            log.warn(
                f"No observations in dataset after {stage} stage. Exiting pipeline"
            )
            break
    else:  # Only done if loop does not break (all stages finish normally)
        # Publish files for session
        files.publish_files()

    session_timer.end()

    # Store configuration to library
    setup.store_config_to_library(rundate, pipeline, session)

    # Write reports specified in config
    reports.write(rundate, pipeline)

    # Write requirements to file for reproducibility
    util.write_requirements()
Ejemplo n.º 11
0
def call_one(package_name,
             plugin_name,
             part=None,
             prefix=None,
             logger=log.time,
             use_timer=True,
             do_report=True,
             **kwargs):
    """Call one plug-in

    If the plug-in is not part of the package an UnknownPluginError is raised.

    If there are several functions registered in a plug-in and `part` is not specified, then the first function
    registered in the plug-in will be called.

    The file containing the source code of the plug-in is added to the list of dependencies.

    Args:
        package_name (String):  Name of package containing plug-ins.
        plugin_name (String):   Name of the plug-in, i.e. the module containing the plug-in.
        part (String):          Name of function to call within the plug-in (optional).
        prefix (String):        Prefix of the plug-in name, used if the plug-in name is unknown (optional).
        logger (Function):      Logger from the lib.log package specifying the level of logging to be used (optional).
        use_timer (Boolean):    Whether to time and log the call to the plug-in (optional).
        do_report (Boolean):    Whether to add the call to the plug-in to the report (optional).
        kwargs:                 Named arguments passed on to the plug-in.

    Returns:
        Return value of the plug-in.
    """
    # Get Plugin-object
    plugin_name = load_one(package_name, plugin_name, prefix=prefix)
    part = "__default__" if part is None else part
    try:
        plugin = _PLUGINS[package_name][plugin_name][part]
    except KeyError:
        raise exceptions.UnknownPluginError(
            "Plugin '{}' not found for '{}' in '{}'"
            "".format(part, plugin_name, package_name)) from None

    # Add plug-in to report
    if do_report:
        from where.reports import report

        code_kwargs = kwargs.copy()
        if "dset" in code_kwargs:
            code_kwargs["dset"] = code_kwargs["dset"].repr
        report.add(
            package_name,
            __plugin__=plugin.name,
            __doc__=plugin.function.__doc__,
            __text__="TODO",
            __code__=
            "kwargs = {}\n{} = plugins.call_one('{}', '{}', part='{}', **kwargs)"
            "".format(code_kwargs, plugin_name, package_name, plugin_name,
                      part),
            **kwargs,
        )

    # Call plug-in
    dependencies.add(plugin.file_path, label="plugin")
    if logger:
        logger(f"Start {plugin.name} in {package_name}")
        time_logger = log.time if use_timer else None
    else:
        time_logger = None
    with timer(f"Finish {plugin.name} ({package_name}) in",
               logger=time_logger):
        return plugin.function(**kwargs)