def write(writer: str, **writer_args: Any) -> None: """Call one writer Args: writer: Name of writer. writer_args: Arguments passed on to writer. """ plugins.call(package_name=__name__, plugin_name=writer, **writer_args)
def test_logger(capsys, plugin_package): """Test that using a logger prints to stdout""" plugin_name = "plugin_plain" plugins.call(plugin_package, plugin_name, plugin_logger=print) stdout, stderr = capsys.readouterr() assert len(stdout) > 0 assert stderr == ""
def main(): """Invoke where_tools To add a new tool, simply add a new .py-file with a registered plugin that will be called when the tool is called. """ # Start logging log.init(log_level="info") # First read tool from command line, don't use util.parse_args() as that overrides -h for each tool try: tool = [a for a in sys.argv[1:] if not a.startswith("-")][0] sys.argv.remove(tool) except IndexError: util._print_help_from_doc(__name__) raise SystemExit # Check that tool is available and figure out signature try: sig = plugins.signature(__name__, tool) tool_module = plugins.get(__name__, tool).function.__module__ except exceptions.UnknownPluginError as err: util._print_help_from_doc(__name__) err.args = ( f"{err.args[0]}\n Available tools are {', '.join(plugins.names(__name__))}", ) raise # Parse parameters util.check_help_and_version(doc_module=tool_module) tool_args = dict() for key, param in sig.parameters.items(): if param.annotation is None: raise SystemExit(f"{param} in {tool} tool is not annotated") if param.annotation == "datedoy": if util.check_options("--doy"): date = util.parse_args("doy", doc_module=__name__) else: date = util.parse_args("date", doc_module=__name__) tool_args[key] = date elif param.annotation == "pipeline": tool_args[key] = pipelines.get_from_options() config.read_pipeline(tool_args[key]) elif param.annotation == "option": tool_args[key] = util.read_option_value(f"--{key}") else: tool_args[key] = util.parse_args(param.annotation, doc_module=tool_module) # Call tool plugins.call(__name__, tool, **tool_args)
def make_map(dset): """Make and show a basic matplotlib plot relevant for the current pipeline""" try: plugins.call(package_name=__name__, plugin_name=dset.vars["pipeline"], part="make_map", dset=dset) except mg_exceptions.UnknownPluginError: log.warn( f"Pipeline {dset.vars['pipeline']} has not defined function make_map" )
def run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs): # Skip stages where no dependencies have changed dep_path = config.files.path("depends", file_vars={ **kwargs, "stage": stage }) if not (dependencies.changed(dep_path) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) return if dset is None: try: # Read dataset from disk if it exists dset = dataset.Dataset.read(rundate=rundate, pipeline=pipeline, stage=prev_stage, label="last", **kwargs) except (OSError, ValueError): # Create emtpy dataset dset = dataset.Dataset(rundate=rundate, pipeline=pipeline, **kwargs) # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path) if prev_stage is not None: dependencies.add(config.files.path("depends", file_vars={ **kwargs, "stage": prev_stage }), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_stage(stage, **kwargs) # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() return dset
def get(dset, **obs_args): """Construct a Dataset for the pipeline based on observations Args: dset: A Dataset that will be filled with observations and necessary fields """ rundate = dset.analysis["rundate"] pipeline = dset.vars["pipeline"] plugins.call(package_name=__name__, plugin_name=pipeline, dset=dset, rundate=rundate, **obs_args)
def write(default_dset): """Call all writers specified in the configuration The list of writers to use is taken from the config file of the given technique. Each writer is passed a :class:`~where.data.dataset.Dataset` with data for the modelrun and should write the relevant parts of the data to file. By default the last dataset for the default_stage is sent to the writer, but that is possible to override with the following notation: output = writer_1 # Use default dataset output = writer_1:calculate # Use last dataset of "calculate" stage output = writer_1:calculate/2 # Use dataset 2 of "calculate" stage Args: default_dset (Dataset): Dataset used by default. """ dsets = { f"{default_dset.vars['stage']}/{default_dset.vars['dataset_id']}": default_dset } prefix = config.analysis.get("analysis", default="").str output_list = config.tech.output.list writer_and_dset = [o.partition(":")[::2] for o in output_list] rundate = config.analysis.rundate.date tech = config.analysis.tech.str session = config.analysis.session.str for writer, dset_str in writer_and_dset: # Read the datasets if dset_str not in dsets: stage, _, dset_id = dset_str.partition("/") stage, _, dset_name = stage.partition(":") stage = stage if stage else default_dset.vars["stage"] dset_name = dset_name if dset_name else session dset_id = int(dset_id) if dset_id else "last" dsets[dset_str] = data.Dataset(rundate, tech=tech, stage=stage, dataset_name=dset_name, dataset_id=dset_id, session=session) # Call the writers plugins.call(package_name=mg_writers.__name__, plugin_name=writer, prefix=prefix, dset=dsets[dset_str])
def apply_observation_rejectors(config_key: str, dset: "Dataset", independent: bool) -> np.ndarray: """Apply all configured observation rejectors Args: config_key: The configuration key listing which rejectors to apply. dset: Dataset containing analysis data. independent: Flag to indicate whether the rejectors are applied independently or sequentially Returns: Dataset with rejected observation """ prefix = dset.vars["pipeline"] rejectors = config.tech[config_key].list word = "independently" if independent else "sequentially" num_obs_before = dset.num_obs log.info(f"Applying observation rejectors {word}") all_keep_idx = np.ones(num_obs_before, dtype=bool) for rejector in rejectors: rejector_keep_idx = plugins.call(package_name=__name__, plugin_name=rejector, prefix=prefix, dset=dset) if independent: all_keep_idx = np.logical_and(all_keep_idx, rejector_keep_idx) else: dset.subset(rejector_keep_idx) log.info(f"Found {sum(~rejector_keep_idx):5d} observations based on {rejector}") if independent: dset.subset(all_keep_idx) log.info(f"Removing {num_obs_before - dset.num_obs} of {num_obs_before} observations") return dset
def setup_parser(parser_name=None, file_key=None, **kwargs): """Set up the given parser. Note that this only sets up the parser, no data will be read and parsed. It is possible to give file key instead of parser name. In that case the name of the parser will be read from the file list. TODO: This is the old style of running parsers, can be deleted when all parsers are new style. Args: parser_name (String): Name of parser. file_key (String): Used to look up parser in the Where file list. kwargs: Input arguments to the parser. Returns: Parser: An instance of the given parser """ parser_name = config.files.get(section=file_key, key="parser", value=parser_name).str if not parser_name: log.warn( f"No parser found for {file_key!r} in {', '.join(config.files.sources)}" ) parser = plugins.call(package_name=mg_parsers.__name__, plugin_name=parser_name, **kwargs) if file_key is not None: parser.file_key = file_key return parser
def call(config_key, dset, partial_vectors, obs_noise): """Call an estimator Args: config_key (String): Config key specifying the name of the estimator. dset (Dataset): Model run data. partial_vectors (Dict): Names and values of the partial derivatives for each partial config key. obs_noise (Array): Observation noise, numpy array with one float value for each observation. """ estimator_name = config.tech[config_key].str if estimator_name: plugins.call( package_name=__name__, plugin_name=estimator_name, dset=dset, partial_vectors=partial_vectors, obs_noise=obs_noise, )
def write(default_dset): """Call all writers specified in the configuration The list of writers to use is taken from the config file of the given technique. Each writer is passed a :class:`~where.data.dataset.Dataset` with data for the modelrun and should write the relevant parts of the data to file. By default the last dataset for the default_stage is sent to the writer, but that is possible to override with the following notation: output = writer_1 # Use default dataset output = writer_1:calculate # Use last dataset of "calculate" stage output = writer_1:calculate/2 # Use dataset 2 of "calculate" stage Args: default_dset (Dataset): Dataset used by default. """ prefix = config.analysis.get("analysis", default="").str output_list = config.tech.output.list writer_and_dset = [o.partition(":")[::2] for o in output_list] dset_vars = config.analysis.config.as_dict() dset_vars["rundate"] = config.analysis.rundate.date for writer, dset_str in writer_and_dset: # Read the datasets if dset_str: stage, _, label = dset_str.partition("/") stage = stage if stage else default_dset.vars["stage"] label = label if label else "last" dset = dataset.Dataset.read(stage=stage, label=label, **dset_vars) plugins.call(package_name=mg_writers.__name__, plugin_name=writer, prefix=prefix, dset=dset) else: plugins.call(package_name=mg_writers.__name__, plugin_name=writer, prefix=prefix, dset=default_dset) publish_files()
def apply_observation_rejector(rejector: str, dset: "Dataset", **kwargs: Dict[Any, Any]) -> None: """Apply defined outlier detector for a given session Args: detector: The outlier detector name. dset: Dataset containing analysis data. kwargs: Input arguments to the detector. """ log.info(f"Applying observation rejectors {rejector!r}") keep_idx = plugins.call(package_name=__name__, plugin_name=rejector, dset=dset, **kwargs) log.info(f"Removing {sum(~keep_idx)} of {dset.num_obs} observations") dset.subset(keep_idx)
def file_vars(): """Get a list of file variables for the current pipeline The active analysis variables are also made available, but may be overridden by the pipeline. """ file_vars = dict(config.analysis.config.as_dict(), **config.date_vars(config.analysis.rundate.date)) pipeline_file_vars = plugins.call(package_name=__name__, plugin_name=config.analysis.tech.str, part="file_vars") file_vars.update(pipeline_file_vars) return file_vars
def parse_file( parser_name: str, file_path: Union[str, pathlib.Path], encoding: Optional[str] = None, parser_logger: Optional[Callable[[str], None]] = print, timer_logger: Optional[Callable[[str], None]] = None, use_cache: bool = False, **parser_args: Any, ) -> Parser: """Use the given parser on a file and return parsed data Specify `parser_name` and `file_path` to the file that should be parsed. The following parsers are available: {doc_parser_names} Data can be retrieved either as Dictionaries, Pandas DataFrames or Midgard Datasets by using one of the methods `as_dict`, `as_dataframe` or `as_dataset`. Example: >>> df = parse_file('rinex2_obs', 'ande3160.16o').as_dataframe() # doctest: +SKIP Args: parser_name: Name of parser file_path: Path to file that should be parsed. encoding: Encoding in file that is parsed. parser_logger: Logging function that will be used by parser. timer_logger: Logging function that will be used to log timing information. use_cache: Whether to use a cache to avoid parsing the same file several times. parser_args: Input arguments to the parser Returns: Parser: Parser with the parsed data """ # TODO: Cache # Create the parser and parse the data parser = plugins.call( package_name=__name__, plugin_name=parser_name, file_path=file_path, encoding=encoding, logger=parser_logger, **parser_args, ) with Timer(f"Finish {parser_name} ({__name__}) - {file_path} in", logger=timer_logger): return parser.parse()
def apply_remover(remover: str, dset: "Dataset", **kwargs: Dict[Any, Any]) -> None: """Apply defined remover for a given session Args: remover: The remover name. dset: Dataset containing analysis data. kwargs: Input arguments to the remover. """ log.info(f"Apply remover {remover!r}") keep_idx = plugins.call(package_name=__name__, plugin_name=remover, dset=dset, **kwargs) log.info(f"Keeping {sum(keep_idx)} of {dset.num_obs} observations") dset.subset(keep_idx) if dset.num_obs == 0: log.fatal("No observations are available.")
def partial_config_keys(estimator_config_key): """Find which partials that a given estimator requires. Finds the partial config keys by calling the function registered as 'partial_config_keys' on the given estimator. Args: estimator_config_key (String): Config key specifying the name of the estimator. Returns: Tuple: Strings with names of config keys listing which partial models to run. """ estimator_name = config.tech[estimator_config_key].str return plugins.call(package_name=__name__, plugin_name=estimator_name, part="partial_config_keys")
def get_trf_factory(time, reference_frame): """Get a factory for a given reference frame The factory knows how to create TrfSite objects for a given reference frame, for instance `itrf:2014`. Args: time (Time): Time epochs for which to calculate the reference frame. reference_frame (String): Specification of which reference frame to use (see `get_trf`). Returns: TrfFactory: Factory that knows how to create TrfSite objects. """ name, _, version = reference_frame.partition(":") kwargs = dict(version=version) if version else dict() return plugins.call(package_name=__name__, plugin_name=name, time=time, **kwargs)
def get_satellite(satellite_name, **kwargs): """Get a satellite object by name Args: satellite_name (String): Name used to look up satellite. kwargs (Dict): Arguments that will be passed to the satellite object. Returns: A satellite object describing the satellite. """ try: plugin, part = satellites()[satellite_name.lower()] except KeyError: log.fatal(f"Unknown satellite '{satellite_name}'. Defined satellites are {', '.join(names())}") return plugins.call(package_name=__name__, plugin_name=plugin, part=part, **kwargs)
def get_crf_factory(time, celestial_reference_frame): """Get a factory for a given celestial reference frame The factory knows how to create RadioSource objects for a given reference frame, for instance `icrf2`. Args: celestial_reference_frame (String): Specification of which reference frame to use (see `get_crf`). Returns: CrfFactory: Factory that knows how to create RadioSource objects. """ name, _, catalog = celestial_reference_frame.partition(":") kwargs = dict(catalog=catalog) if catalog else dict() return plugins.call(package_name=__name__, plugin_name=name, time=time, **kwargs)
def options(): """List the command line options for starting the different pipelines Returns: Dict: Command line options pointing to pipelines """ options = dict() plugin_files = plugins.names(package_name=__name__) for pipeline in plugin_files: try: pipeline_options = plugins.call(package_name=__name__, plugin_name=pipeline, part="options") except mg_exceptions.UnknownPluginError: continue options.update({opt: pipeline for opt in pipeline_options}) return options
def list_sessions(rundate, pipeline): """Get a list of sessions for a given rundate for a pipeline Args: rundate (Date): The model run date. pipeline (String): Name of pipeline. Returns: List: Strings with the names of the sessions. """ try: return plugins.call(package_name=__name__, plugin_name=pipeline, part="list_sessions", rundate=rundate) except mg_exceptions.UnknownPluginError: return [ "" ] # If sessions is not defined in the pipeline, return a list with one unnamed session
def get_session(rundate, pipeline): """Read session from command line options The session is validated for the given pipeline. Uses the `validate_session`-plugin for validation. Args: pipeline (String): Name of pipeline. Returns: String: Name of session. """ session = util.read_option_value("--session", default="") try: return plugins.call(package_name=__name__, plugin_name=pipeline, part="validate_session", rundate=rundate, session=session) except mg_exceptions.UnknownPluginError: return session # Simply return session if it can not be validated
def get_args(rundate, pipeline, input_args=None): """Get a list of sessions for a given rundate for a pipeline Args: rundate (Date): The model run date. pipeline (String): Name of pipeline. input_args: List of arguments from the command line Returns: List: List of command line arguments """ try: return plugins.call(package_name=__name__, plugin_name=pipeline, part="get_args", rundate=rundate, input_args=input_args) except mg_exceptions.UnknownPluginError: log.warn(f"Pipeline {pipeline} has not defined function get_args") return input
def get_orbit(apriori_orbit=None, **kwargs): """Get an apriori orbit The specification of the apriori orbit is matched with a filename in this orbit-directory. If it is not passed in as an argument, the apriori orbit to use is read from the configuration. Args: apriori_orbit (String): Optional specification of which apriori orbit to use (see above). Returns: AprioriOrbit: Apriori orbit object. """ apriori_orbit = config.tech.get("apriori_orbit", apriori_orbit).str if apriori_orbit not in ["broadcast", "precise", "slr"]: log.fatal( "Configuration value '{}' for option 'apriori_orbit' is unknown. It should be either 'broadcast' " "and/or 'precise', or 'slr'. ", apriori_orbit, ) return plugins.call(package_name=__name__, plugin_name=apriori_orbit, **kwargs)
def get(datasource_name, **kwargs): """Read data from the given data source Simple data sources that only return data directly from a parser does not need an explicit apriori-file. This is handled by looking in the parser-directory if a data source is not found in the apriori directory. The import of where.parsers is done locally to avoid circular imports. Args: datasource_name (String): Name of apriori data source kwargs: Input arguments to the data source Returns: The data from the data source (data type depends on source) """ try: return plugins.call(package_name=__name__, plugin_name=datasource_name, **kwargs) except exceptions.UnknownPluginError as apriori_err: from where import parsers try: data = parsers.parse_key(file_key=datasource_name, **kwargs).as_dict() log.dev( f"Called parsers.parse_key({datasource_name}) in apriori.get()" ) return data except AttributeError: try: data = parsers.parse(datasource_name, **kwargs) log.dev( f"Called parsers.parse({datasource_name}) in apriori.get()" ) return data except exceptions.UnknownPluginError: raise apriori_err from None
def test_call_existing_plugin(tmpfile): """Test that calling a parser-plugin works, and returns a Parser instance""" package_name = "midgard.parsers" plugin_name = plugins.names(package_name)[0] parser = plugins.call(package_name, plugin_name, file_path=tmpfile) assert isinstance(parser, Parser)
def test_call_non_existing_part(plugin_package): """Test that calling a non-existing part raises an error""" plugin_name = "plugin_parts" part_name = "non_existent" with pytest.raises(exceptions.UnknownPluginError): plugins.call(plugin_package, plugin_name, part=part_name)
def test_call_non_existing_plugin(): """Test that calling a non-existing plugin raises an error""" with pytest.raises(exceptions.UnknownPluginError): plugins.call("midgard.dev", "non_existent")
def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up session config config.init(rundate=rundate, tech_name=pipeline, session=session) # Set up prefix for console logger and start file logger log_cfg = config.where.log prefix = f"{pipeline.upper()} {session} {rundate:%Y-%m-%d}" log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Find which stages we will run analysis for # TODO: Specify stage_list in config stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dset = None dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed dep_path = files.path("depends", file_vars=dict(stage=stage)) if not (dependencies.changed(dep_path, fast_check=dep_fast) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue elif dset is None: # Create or read dataset empty = stage == stage_list[0] dset = dataset.Dataset(rundate, tech=pipeline, stage=prev_stage, dataset_name=session, dataset_id="last", empty=empty) # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path, fast_check=dep_fast) dependencies.add(files.path("depends", file_vars=dict(stage=prev_stage)), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_from_file(stage=stage, dataset_id="all") # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() if dset.num_obs == 0: log.warn( f"No observations in dataset after {stage} stage. Exiting pipeline" ) break else: # Only done if loop does not break (all stages finish normally) # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()
def test_load_non_existing_plugin_with_prefix(): """Test that calling a non-existing plugin fails also when using prefix""" with pytest.raises(exceptions.UnknownPluginError): plugins.call("midgard.dev", "non_existent", prefix="prefix")