def parse(self): """Parse data This is a basic implementation that carries out the whole pipeline of reading and parsing datafiles including calculating secondary data. Returns: Parser: The parsed data """ log.dev( f"where.parsers.parser is deprecated. Use where.parsers._parser or one of it's subclasses instead." ) if self.file_path is None: self.file_path = config.files.path(self.file_key, file_vars=self.vars, download_missing=True) parser_package, parser_name = self.__module__.rsplit(".", maxsplit=1) with Timer("Finish {} ({}) - {} in".format(parser_name, parser_package, self.file_key)): if self.data_available: self.read_data() if not self.data_available: # May have been set to False by self.read_data() log.warn( f"No data found by {self.__class__.__name__} for {self.rundate.strftime(config.FMT_date)} " f"(was looking for {self.file_path})") return self self.calculate_data() dependencies.add(*self.dependencies, label=self.file_key) return self
def test_timer_without_text(capsys): """Test that timer with None text does not print anything""" with Timer(None, logger=print): sum(n**2 for n in range(1000)) stdout, stderr = capsys.readouterr() assert stdout == "" assert stderr == ""
def test_timer_as_context_manager(capsys): """Test that timed context prints timing information""" with Timer(TIME_MESSAGE, logger=print): sum(n**2 for n in range(1000)) stdout, stderr = capsys.readouterr() assert RE_TIME_MESSAGE.match(stdout) assert stdout.count("\n") == 1 assert stderr == ""
def test_format_of_time_elapsed(capsys): """Test that we can change the format of the time elapsed""" with Timer(TIME_MESSAGE, fmt=".8f", logger=print): sum(n**2 for n in range(1000)) stdout, stderr = capsys.readouterr() assert re.match(TIME_MESSAGE + r" 0\.\d{8} seconds", stdout) assert stdout.count("\n") == 1 assert stderr == ""
def read_dset(rundate): with Timer(f"Finish read of day {rundate} in", logger=log.time): try: log.info(f"Reading data for {rundate}") return dataset.Dataset.read(**dict(dset_vars, rundate=rundate)) except OSError as err: log.warn(f"Unable to read data for {rundate}: {err}") return dataset.Dataset()
def test_text_with_format(capsys): """Test that we can explicitly add point where time is inserted in text""" time_message = "Used {} to run the code" with Timer(time_message, logger=print): sum(n**2 for n in range(1000)) stdout, stderr = capsys.readouterr() assert re.match(time_message.format(r"0\.\d{4} seconds"), stdout) assert stdout.count("\n") == 1 assert stderr == ""
def _concatenate_datasets(from_date: date, to_date: date, dset_vars: Dict[str, str], only_for_rundate: bool) -> np.ndarray: """Concatenate datasets Args: from_date: Start date for reading Dataset. to_date: End date for reading Dataset. dset_vars: Common Dataset variables. only_for_rundate: Concatenate only data for given rundate. """ dset_merged = None def read_dset(rundate): with Timer(f"Finish read of day {rundate} in", logger=log.time): try: log.info(f"Reading data for {rundate}") return dataset.Dataset.read(**dict(dset_vars, rundate=rundate)) except OSError as err: log.warn(f"Unable to read data for {rundate}: {err}") return dataset.Dataset() date_to_read = from_date while date_to_read <= to_date: dset = read_dset(date_to_read) if dset: # Skip extension if dataset is empty if only_for_rundate: _keep_data_only_for_rundate(dset) if dset.num_obs == 0: log.warn(f"No data to for {date_to_read} in dataset") # Initialize merged dataset if dset_merged is None: dset_merged = dset # Merged dataset should be related to start date if date_to_read != from_date: dset.vars["rundate"] = from_date.strftime("%Y-%m-%d") dset.analysis["rundate"] = from_date dset.analysis.update(config.date_vars(from_date)) date_to_read += timedelta(days=1) continue with Timer(f"Finish extend for day {date_to_read} in", logger=log.time): dset_merged.extend(dset) date_to_read += timedelta(days=1) dset_merged.analysis.update( id=f"{dset_merged.analysis['id']}_concatenated") return dset_merged
def calculate_data(self): """ TODO: Description? """ for calculator in self.setup_calculators(): log.debug( f"Start calculator {calculator.__name__} in {self.__module__}") with Timer( f"Finish calculator {calculator.__name__} ({self.__module__}) in", logger=log.debug): calculator()
def calculate_data(self): """Do simple manipulations on the data after they are read Simple manipulations of data may be performed in calculators after they are read. They should be kept simple so that a parser returns as true representation of the data file as possible. Advanced calculations may be done inside apriori classes or similar. To add a calculator, define it in its own method, and override the `setup_calculators`-method to return a list of all calculators. """ for calculator in self.setup_calculators(): log.debug(f"Start calculator {calculator.__name__} in {self.__module__}") with Timer(f"Finish calculator {calculator.__name__} ({self.__module__}) in", logger=log.debug): calculator()
def parse_file( parser_name: str, file_path: Union[str, pathlib.Path], encoding: Optional[str] = None, parser_logger: Optional[Callable[[str], None]] = print, timer_logger: Optional[Callable[[str], None]] = None, use_cache: bool = False, **parser_args: Any, ) -> Parser: """Use the given parser on a file and return parsed data Specify `parser_name` and `file_path` to the file that should be parsed. The following parsers are available: {doc_parser_names} Data can be retrieved either as Dictionaries, Pandas DataFrames or Midgard Datasets by using one of the methods `as_dict`, `as_dataframe` or `as_dataset`. Example: >>> df = parse_file('rinex2_obs', 'ande3160.16o').as_dataframe() # doctest: +SKIP Args: parser_name: Name of parser file_path: Path to file that should be parsed. encoding: Encoding in file that is parsed. parser_logger: Logging function that will be used by parser. timer_logger: Logging function that will be used to log timing information. use_cache: Whether to use a cache to avoid parsing the same file several times. parser_args: Input arguments to the parser Returns: Parser: Parser with the parsed data """ # TODO: Cache # Create the parser and parse the data parser = plugins.call( package_name=__name__, plugin_name=parser_name, file_path=file_path, encoding=encoding, logger=parser_logger, **parser_args, ) with Timer(f"Finish {parser_name} ({__name__}) - {file_path} in", logger=timer_logger): return parser.parse()
def test_explicit_timer(capsys): """Test that timed section prints timing information""" t = Timer(TIME_MESSAGE, logger=print) t.start() sum(n**2 for n in range(1000)) t.end() stdout, stderr = capsys.readouterr() assert RE_TIME_MESSAGE.match(stdout) assert stdout.count("\n") == 1 assert stderr == ""
def _concatenate_datasets(from_date: date, to_date: date, dset_vars: Dict[str, str], only_for_rundate: bool) -> np.ndarray: """Concatenate datasets Args: from_date: Start date for reading Dataset. to_date: End date for reading Dataset. dset_vars: Common Dataset variables. only_for_rundate: Concatenate only data for given rundate. """ def read_dset(rundate): with Timer(f"Finish read of day {rundate} in", logger=log.time): try: log.info(f"Reading data for {rundate}") return dataset.Dataset.read(**dict(dset_vars, rundate=rundate)) except OSError as err: log.warn(f"Unable to read data for {rundate}: {err}") return dataset.Dataset() dset_merged = read_dset(from_date) date_to_read = from_date + timedelta(days=1) while date_to_read <= to_date: dset = read_dset(date_to_read) if only_for_rundate: _keep_data_only_for_rundate(dset) if dset.num_obs == 0: log.warn(f"No data to for {date_to_read} in dataset") with Timer(f"Finish extend for day {date_to_read} in", logger=log.time): dset_merged.extend(dset) date_to_read += timedelta(days=1) dset_merged.analysis.update( id=f"{dset_merged.analysis['id']}_concatenated") return dset_merged
def main(): """Parse command line options and run the Where analysis Do simple parsing of command line arguments. Set up config-files and start the analysis. See the help docstring at the top of the file for more information about the workflow. """ util.check_help_and_version(doc_module=__name__) # Start logging log.init(config.where.log.default_level.str) log.debug( f"Use {util.get_python_version()} on process {util.get_pid_and_server()}" ) # Read command line options pipeline = pipelines.get_from_options() config.read_pipeline(pipeline) if util.check_options("--doy"): rundate = util.parse_args("doy", doc_module=__name__) else: rundate = util.parse_args("date", doc_module=__name__) args, kwargs = util.options2args(sys.argv[1:]) # Start an interactive session if util.check_options("-I", "--interactive"): from where.tools import interactive # Local import because interactive imports many external packages interactive.interactive(rundate, pipeline, **kwargs) return # Set up the configuration for a new analysis or update an existing one unused_options = setup.setup_config(rundate, pipeline, *args, **kwargs) pipeline_args, pipeline_kwargs = util.options2args(unused_options) # Run the analysis setup.add_timestamp(rundate, pipeline, "last run", **kwargs) with Timer(f"Finish pipeline {pipeline.upper()} in"): pipelines.run(rundate, pipeline, *pipeline_args, **pipeline_kwargs)
def test_access_timer_object_in_context(capsys): """Test that we can access the timer object inside a context""" with Timer(TIME_MESSAGE, logger=print) as t: assert isinstance(t, Timer) assert t.text.startswith(TIME_MESSAGE) _, _ = capsys.readouterr() # Do not print log message to standard out
def test_error_if_timer_not_running(): """Test that timer raises error if it is stopped before started""" t = Timer(TIME_MESSAGE, logger=print) with pytest.raises(exceptions.TimerNotRunning): t.end()
def test_custom_logger(): """Test that we can use a custom logger""" logger = CustomLogger() with Timer(TIME_MESSAGE, logger=logger): sum(n**2 for n in range(1000)) assert RE_TIME_MESSAGE.match(logger.messages)