Example #1
0
    def parse(self):
        """Parse data

        This is a basic implementation that carries out the whole pipeline of reading and parsing datafiles including
        calculating secondary data.

        Returns:
            Parser: The parsed data
        """
        log.dev(
            f"where.parsers.parser is deprecated. Use where.parsers._parser or one of it's subclasses instead."
        )

        if self.file_path is None:
            self.file_path = config.files.path(self.file_key,
                                               file_vars=self.vars,
                                               download_missing=True)

        parser_package, parser_name = self.__module__.rsplit(".", maxsplit=1)
        with Timer("Finish {} ({}) - {} in".format(parser_name, parser_package,
                                                   self.file_key)):
            if self.data_available:
                self.read_data()

            if not self.data_available:  # May have been set to False by self.read_data()
                log.warn(
                    f"No data found by {self.__class__.__name__} for {self.rundate.strftime(config.FMT_date)} "
                    f"(was looking for {self.file_path})")
                return self

            self.calculate_data()
            dependencies.add(*self.dependencies, label=self.file_key)

        return self
Example #2
0
def test_timer_without_text(capsys):
    """Test that timer with None text does not print anything"""
    with Timer(None, logger=print):
        sum(n**2 for n in range(1000))

    stdout, stderr = capsys.readouterr()
    assert stdout == ""
    assert stderr == ""
Example #3
0
def test_timer_as_context_manager(capsys):
    """Test that timed context prints timing information"""
    with Timer(TIME_MESSAGE, logger=print):
        sum(n**2 for n in range(1000))
    stdout, stderr = capsys.readouterr()
    assert RE_TIME_MESSAGE.match(stdout)
    assert stdout.count("\n") == 1
    assert stderr == ""
Example #4
0
def test_format_of_time_elapsed(capsys):
    """Test that we can change the format of the time elapsed"""
    with Timer(TIME_MESSAGE, fmt=".8f", logger=print):
        sum(n**2 for n in range(1000))
    stdout, stderr = capsys.readouterr()
    assert re.match(TIME_MESSAGE + r" 0\.\d{8} seconds", stdout)
    assert stdout.count("\n") == 1
    assert stderr == ""
Example #5
0
 def read_dset(rundate):
     with Timer(f"Finish read of day {rundate} in", logger=log.time):
         try:
             log.info(f"Reading data for {rundate}")
             return dataset.Dataset.read(**dict(dset_vars, rundate=rundate))
         except OSError as err:
             log.warn(f"Unable to read data for {rundate}: {err}")
             return dataset.Dataset()
Example #6
0
def test_text_with_format(capsys):
    """Test that we can explicitly add point where time is inserted in text"""
    time_message = "Used {} to run the code"
    with Timer(time_message, logger=print):
        sum(n**2 for n in range(1000))
    stdout, stderr = capsys.readouterr()
    assert re.match(time_message.format(r"0\.\d{4} seconds"), stdout)
    assert stdout.count("\n") == 1
    assert stderr == ""
Example #7
0
def _concatenate_datasets(from_date: date, to_date: date, dset_vars: Dict[str,
                                                                          str],
                          only_for_rundate: bool) -> np.ndarray:
    """Concatenate datasets

    Args:
        from_date:         Start date for reading Dataset.
        to_date:           End date for reading Dataset.
        dset_vars:         Common Dataset variables.
        only_for_rundate:  Concatenate only data for given rundate.
    """
    dset_merged = None

    def read_dset(rundate):
        with Timer(f"Finish read of day {rundate} in", logger=log.time):
            try:
                log.info(f"Reading data for {rundate}")
                return dataset.Dataset.read(**dict(dset_vars, rundate=rundate))
            except OSError as err:
                log.warn(f"Unable to read data for {rundate}: {err}")
                return dataset.Dataset()

    date_to_read = from_date
    while date_to_read <= to_date:

        dset = read_dset(date_to_read)
        if dset:  # Skip extension if dataset is empty

            if only_for_rundate:
                _keep_data_only_for_rundate(dset)

                if dset.num_obs == 0:
                    log.warn(f"No data to for {date_to_read} in dataset")

            # Initialize merged dataset
            if dset_merged is None:

                dset_merged = dset

                # Merged dataset should be related to start date
                if date_to_read != from_date:
                    dset.vars["rundate"] = from_date.strftime("%Y-%m-%d")
                    dset.analysis["rundate"] = from_date
                    dset.analysis.update(config.date_vars(from_date))

                date_to_read += timedelta(days=1)
                continue

            with Timer(f"Finish extend for day {date_to_read} in",
                       logger=log.time):
                dset_merged.extend(dset)

        date_to_read += timedelta(days=1)

    dset_merged.analysis.update(
        id=f"{dset_merged.analysis['id']}_concatenated")
    return dset_merged
Example #8
0
 def calculate_data(self):
     """
     TODO: Description?
     """
     for calculator in self.setup_calculators():
         log.debug(
             f"Start calculator {calculator.__name__} in {self.__module__}")
         with Timer(
                 f"Finish calculator {calculator.__name__} ({self.__module__}) in",
                 logger=log.debug):
             calculator()
Example #9
0
    def calculate_data(self):
        """Do simple manipulations on the data after they are read

        Simple manipulations of data may be performed in calculators after they are read. They should be kept simple so
        that a parser returns as true representation of the data file as possible. Advanced calculations may be done
        inside apriori classes or similar.

        To add a calculator, define it in its own method, and override the `setup_calculators`-method to return a list
        of all calculators.
        """
        for calculator in self.setup_calculators():
            log.debug(f"Start calculator {calculator.__name__} in {self.__module__}")
            with Timer(f"Finish calculator {calculator.__name__} ({self.__module__}) in", logger=log.debug):
                calculator()
Example #10
0
def parse_file(
    parser_name: str,
    file_path: Union[str, pathlib.Path],
    encoding: Optional[str] = None,
    parser_logger: Optional[Callable[[str], None]] = print,
    timer_logger: Optional[Callable[[str], None]] = None,
    use_cache: bool = False,
    **parser_args: Any,
) -> Parser:
    """Use the given parser on a file and return parsed data

    Specify `parser_name` and `file_path` to the file that should be parsed. The following parsers are available:

    {doc_parser_names}

    Data can be retrieved either as Dictionaries, Pandas DataFrames or Midgard Datasets by using one of the methods
    `as_dict`, `as_dataframe` or `as_dataset`.

    Example:

        >>> df = parse_file('rinex2_obs', 'ande3160.16o').as_dataframe()  # doctest: +SKIP

    Args:
        parser_name:    Name of parser
        file_path:      Path to file that should be parsed.
        encoding:       Encoding in file that is parsed.
        parser_logger:  Logging function that will be used by parser.
        timer_logger:   Logging function that will be used to log timing information.
        use_cache:      Whether to use a cache to avoid parsing the same file several times.
        parser_args:    Input arguments to the parser

    Returns:
        Parser:  Parser with the parsed data
    """
    # TODO: Cache

    # Create the parser and parse the data
    parser = plugins.call(
        package_name=__name__,
        plugin_name=parser_name,
        file_path=file_path,
        encoding=encoding,
        logger=parser_logger,
        **parser_args,
    )

    with Timer(f"Finish {parser_name} ({__name__}) - {file_path} in",
               logger=timer_logger):
        return parser.parse()
Example #11
0
def test_explicit_timer(capsys):
    """Test that timed section prints timing information"""
    t = Timer(TIME_MESSAGE, logger=print)
    t.start()
    sum(n**2 for n in range(1000))
    t.end()
    stdout, stderr = capsys.readouterr()
    assert RE_TIME_MESSAGE.match(stdout)
    assert stdout.count("\n") == 1
    assert stderr == ""
Example #12
0
def _concatenate_datasets(from_date: date, to_date: date, dset_vars: Dict[str,
                                                                          str],
                          only_for_rundate: bool) -> np.ndarray:
    """Concatenate datasets

    Args:
        from_date:         Start date for reading Dataset.
        to_date:           End date for reading Dataset.
        dset_vars:         Common Dataset variables.
        only_for_rundate:  Concatenate only data for given rundate.
    """
    def read_dset(rundate):
        with Timer(f"Finish read of day {rundate} in", logger=log.time):
            try:
                log.info(f"Reading data for {rundate}")
                return dataset.Dataset.read(**dict(dset_vars, rundate=rundate))
            except OSError as err:
                log.warn(f"Unable to read data for {rundate}: {err}")
                return dataset.Dataset()

    dset_merged = read_dset(from_date)

    date_to_read = from_date + timedelta(days=1)
    while date_to_read <= to_date:

        dset = read_dset(date_to_read)

        if only_for_rundate:
            _keep_data_only_for_rundate(dset)

            if dset.num_obs == 0:
                log.warn(f"No data to for {date_to_read} in dataset")

        with Timer(f"Finish extend for day {date_to_read} in",
                   logger=log.time):
            dset_merged.extend(dset)
        date_to_read += timedelta(days=1)

    dset_merged.analysis.update(
        id=f"{dset_merged.analysis['id']}_concatenated")
    return dset_merged
Example #13
0
def main():
    """Parse command line options and run the Where analysis

    Do simple parsing of command line arguments. Set up config-files and start the analysis. See the help docstring at
    the top of the file for more information about the workflow.
    """
    util.check_help_and_version(doc_module=__name__)

    # Start logging
    log.init(config.where.log.default_level.str)
    log.debug(
        f"Use {util.get_python_version()} on process {util.get_pid_and_server()}"
    )

    # Read command line options
    pipeline = pipelines.get_from_options()
    config.read_pipeline(pipeline)
    if util.check_options("--doy"):
        rundate = util.parse_args("doy", doc_module=__name__)
    else:
        rundate = util.parse_args("date", doc_module=__name__)

    args, kwargs = util.options2args(sys.argv[1:])

    # Start an interactive session
    if util.check_options("-I", "--interactive"):
        from where.tools import interactive  # Local import because interactive imports many external packages

        interactive.interactive(rundate, pipeline, **kwargs)
        return

    # Set up the configuration for a new analysis or update an existing one
    unused_options = setup.setup_config(rundate, pipeline, *args, **kwargs)

    pipeline_args, pipeline_kwargs = util.options2args(unused_options)

    # Run the analysis
    setup.add_timestamp(rundate, pipeline, "last run", **kwargs)
    with Timer(f"Finish pipeline {pipeline.upper()} in"):
        pipelines.run(rundate, pipeline, *pipeline_args, **pipeline_kwargs)
Example #14
0
def test_access_timer_object_in_context(capsys):
    """Test that we can access the timer object inside a context"""
    with Timer(TIME_MESSAGE, logger=print) as t:
        assert isinstance(t, Timer)
        assert t.text.startswith(TIME_MESSAGE)
    _, _ = capsys.readouterr()  # Do not print log message to standard out
Example #15
0
def test_error_if_timer_not_running():
    """Test that timer raises error if it is stopped before started"""
    t = Timer(TIME_MESSAGE, logger=print)
    with pytest.raises(exceptions.TimerNotRunning):
        t.end()
Example #16
0
def test_custom_logger():
    """Test that we can use a custom logger"""
    logger = CustomLogger()
    with Timer(TIME_MESSAGE, logger=logger):
        sum(n**2 for n in range(1000))
    assert RE_TIME_MESSAGE.match(logger.messages)