def parse(self): """Parse data This is a basic implementation that carries out the whole pipeline of reading and parsing datafiles including calculating secondary data. Returns: Parser: The parsed data """ log.dev( f"where.parsers.parser is deprecated. Use where.parsers._parser or one of it's subclasses instead." ) if self.file_path is None: self.file_path = config.files.path(self.file_key, file_vars=self.vars, download_missing=True) parser_package, parser_name = self.__module__.rsplit(".", maxsplit=1) with Timer("Finish {} ({}) - {} in".format(parser_name, parser_package, self.file_key)): if self.data_available: self.read_data() if not self.data_available: # May have been set to False by self.read_data() log.warn( f"No data found by {self.__class__.__name__} for {self.rundate.strftime(config.FMT_date)} " f"(was looking for {self.file_path})") return self self.calculate_data() dependencies.add(*self.dependencies, label=self.file_key) return self
def call( package_name: str, plugin_name: str, part: Optional[str] = None, prefix: Optional[str] = None, plugin_logger: Optional[Callable[[str], None]] = None, **plugin_args: Any, ) -> Any: """Call one plug-in Args: package_name: Name of package containing plug-ins. plugin_name: Name of the plug-in, i.e. the module containing the plug-in. part: Name of function to call within the plug-in (optional). prefix: Prefix of the plug-in name, used if the plug-in name is not found (optional). plugin_logger: Function used for logging (optional). plugin_args: Named arguments passed on to the plug-in. Returns: Return value of the plug-in. """ plugin = get(package_name, plugin_name, part, prefix) # Log message about calling plug-in if plugin_logger is not None: plugin_logger(f"Start plug-in {plugin.name!r} in {package_name!r}") # Add dependency to the plug-in dependencies.add(plugin.file_path, label=f"plugin:{package_name}") # Call plug-in return plugin.function(**plugin_args)
def _read_data(self): """Read data needed by this Reference Frame for calculating positions of sites Delegates to _read_data_<self.version> to read the actual data. Returns: Dict: Dictionary containing data about each site defined in this reference frame. """ trf = Configuration("trf_custom") trf.profiles = config.analysis.get("tech", value=self.version, default="").list trf_path = files.path("trf-custom") trf_local_path = files.path("trf-custom_local") trf.update_from_file(trf_path) dependencies.add(trf_path, label="trf") if trf_local_path.exists(): trf.update_from_file(trf_local_path) dependencies.add(trf_local_path, label="trf") data = dict() for section in trf.sections: info = { k: v for k, v in section.as_dict().items() if not k == "pos_itrs" } info["pos"] = np.array(section.pos_itrs.list, dtype=float) data[section.name] = info return data
def __init__(self, time, ephemerides): """Create an Ephemerides-instance that calculates ephemerides for the given time epochs. It is possible to not specify the time epochs when creating the instance (set `time=None`). In this case `time` must be supplied as a parameter to each call calculating ephemerides. The SPK-file is read and parsed at the creation of this instance. In particular the names and ids of the available objects are read. Args: time (Time): Time epochs for which to calculate ephemerides. ephemerides (String): Name of ephemerides to use. """ self.time = time self.ephemerides = ephemerides # Open the SPK-file corresponding to the ephemerides eph_filepath = config.files.path( "ephemerides", file_vars=dict(ephemerides=ephemerides), download_missing=True) self._spk = SPK.open(eph_filepath) # TODO: Close file in destructor dependencies.add(eph_filepath, label="ephemerides") # Parse segments in SPK file self._names, self._segments = self._parse_segments()
def parse_key(file_key, file_vars=None, parser_name=None, use_cache=True, **parser_args): """Parse a file given in the Where file-list and return parsed data By specifying a `file_key`. The file_key is looked up in the file list to figure out which file that should be parsed. The name of the parser will also be looked up in the file configuration. The dictionary `file_vars` may be specified if variables are needed to figure out the correct file path from the configuration. The following file keys are available: {doc_file_keys} Data can be retrieved either as Dictionaries, Pandas DataFrames or Where Datasets by using one of the methods `as_dict`, `as_dataframe` or `as_dataset`. Example: > df = parsers.parse_key('center_of_mass', file_vars=dict(satellite='Lageos')).as_dataset() Args: file_key (String): Used to look up parser_name and file_path in the Where file configuration. file_vars (Dict): Additional file variables used when looking up file path in configuration. parser_name (String): Name of parser to use. Default is to use parser named in the file list. use_cache (Boolean): Whether to use a cache to avoid parsing the same file several times. parser_args: Input arguments to the parser. Returns: Parser: Parser with the parsed data """ # Read parser_name from config.files if it is not given parser_name = config.files.get(section=file_key, key="parser", value=parser_name).str if not parser_name: log.warn( f"No parser found for {file_key!r} in {', '.join(config.files.sources)}" ) # Figure out the file path file_vars = dict() if file_vars is None else file_vars download_missing = config.where.files.download_missing.bool file_path = config.files.path(file_key, file_vars=file_vars, download_missing=download_missing, use_aliases=True) dependencies.add(file_path, label=file_key) parser_args.setdefault("encoding", config.files.encoding(file_key)) # Use the Midgard parser function to create parser and parse data return parse_file(parser_name, file_path, use_cache=use_cache, timer_logger=log.time, **parser_args)
def run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs): # Skip stages where no dependencies have changed dep_path = config.files.path("depends", file_vars={ **kwargs, "stage": stage }) if not (dependencies.changed(dep_path) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) return if dset is None: try: # Read dataset from disk if it exists dset = dataset.Dataset.read(rundate=rundate, pipeline=pipeline, stage=prev_stage, label="last", **kwargs) except (OSError, ValueError): # Create emtpy dataset dset = dataset.Dataset(rundate=rundate, pipeline=pipeline, **kwargs) # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path) if prev_stage is not None: dependencies.add(config.files.path("depends", file_vars={ **kwargs, "stage": prev_stage }), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_stage(stage, **kwargs) # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() return dset
def _read(self, dset_raw): """Read SP3 orbit file data and save it in a Dataset In addition to the given date, we read data for the day before and after. This is needed to carry out correct orbit interpolation at the start and end of a day. TODO: How well fits the orbits from day to day? Is it necessary to align the orbits? Args: dset_raw (Dataset): Dataset representing raw data from apriori orbit files """ date_to_read = dset_raw.analysis["rundate"] - timedelta( days=self.day_offset) file_paths = list() # Loop over days to read while date_to_read <= dset_raw.analysis["rundate"] + timedelta( days=self.day_offset): if self.file_path is None: file_path = config.files.path( self.file_key, file_vars=config.date_vars(date_to_read)) else: file_path = self.file_path log.debug(f"Parse precise orbit file {file_path}") # Generate temporary Dataset with orbit file data dset_temp = dataset.Dataset(rundate=date_to_read, pipeline=dset_raw.vars["pipeline"], stage="temporary") parser = parsers.parse(parser_name="orbit_sp3", file_path=file_path, rundate=date_to_read) parser.write_to_dataset(dset_temp) file_paths.append(str(parser.file_path)) dependencies.add(str(parser.file_path), label=self.file_key) # Used for output writing # Extend Dataset dset_raw with temporary Dataset date = date_to_read.strftime("%Y-%m-%d") dset_raw.update_from( dset_temp) if dset_raw.num_obs == 0 else dset_raw.extend( dset_temp, meta_key=date) dset_raw.meta.add("file_path", file_paths, section="parser") date_to_read += timedelta(days=1) return dset_raw
def get_sp3c_or_sp3d(rundate, file_path=None, **kwargs): """Use either OrbitSp3cParser or OrbitSp3dParser for reading orbit files in SP3c or SP3d format Firstly the version of SP3 file is read. Based on the read version number it is decided, which Parser should be used. Args: rundate (date): The model run date. file_path (str): Optional path to orbit-file to parse. """ version = _get_sp3_file_version(file_path) dependencies.add(file_path, label="gnss_orbit_sp3") # MURKS_hjegei: Better solution? if version in "ac": return orbit_sp3c.OrbitSp3cParser(file_path=file_path, **kwargs) elif version.startswith("d"): return orbit_sp3d.OrbitSp3dParser(file_path=file_path, **kwargs) else: log.fatal(f"Unknown SP3 format {version!r} is used in file {file_path}")
def write_sinex(dset): """Write normal equations of session solution in SINEX format. Args: dset: Dataset, data for a model run. """ # Add dependency to sinex_blocks-module dependencies.add(sinex_blocks.__file__) if config.tech.analysis_status.status.str == "bad": log.info("Bad session. Not producing SINEX.") return with files.open("output_sinex", file_vars=dset.vars, mode="wt") as fid: sinex = sinex_blocks.SinexBlocks(dset, fid) sinex.header_line() for block in config.tech[WRITER].blocks.list: block_name, *args = block.split(":") sinex.write_block(block_name, *args) sinex.end_line()
def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up session config config.init(rundate=rundate, tech_name=pipeline, session=session) # Set up prefix for console logger and start file logger log_cfg = config.where.log prefix = f"{pipeline.upper()} {session} {rundate:%Y-%m-%d}" log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Find which stages we will run analysis for # TODO: Specify stage_list in config stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dset = None dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed dep_path = files.path("depends", file_vars=dict(stage=stage)) if not (dependencies.changed(dep_path, fast_check=dep_fast) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue elif dset is None: # Create or read dataset empty = stage == stage_list[0] dset = dataset.Dataset(rundate, tech=pipeline, stage=prev_stage, dataset_name=session, dataset_id="last", empty=empty) # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path, fast_check=dep_fast) dependencies.add(files.path("depends", file_vars=dict(stage=prev_stage)), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_from_file(stage=stage, dataset_id="all") # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() if dset.num_obs == 0: log.warn( f"No observations in dataset after {stage} stage. Exiting pipeline" ) break else: # Only done if loop does not break (all stages finish normally) # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()
def call_one(package_name, plugin_name, part=None, prefix=None, logger=log.time, use_timer=True, do_report=True, **kwargs): """Call one plug-in If the plug-in is not part of the package an UnknownPluginError is raised. If there are several functions registered in a plug-in and `part` is not specified, then the first function registered in the plug-in will be called. The file containing the source code of the plug-in is added to the list of dependencies. Args: package_name (String): Name of package containing plug-ins. plugin_name (String): Name of the plug-in, i.e. the module containing the plug-in. part (String): Name of function to call within the plug-in (optional). prefix (String): Prefix of the plug-in name, used if the plug-in name is unknown (optional). logger (Function): Logger from the lib.log package specifying the level of logging to be used (optional). use_timer (Boolean): Whether to time and log the call to the plug-in (optional). do_report (Boolean): Whether to add the call to the plug-in to the report (optional). kwargs: Named arguments passed on to the plug-in. Returns: Return value of the plug-in. """ # Get Plugin-object plugin_name = load_one(package_name, plugin_name, prefix=prefix) part = "__default__" if part is None else part try: plugin = _PLUGINS[package_name][plugin_name][part] except KeyError: raise exceptions.UnknownPluginError( "Plugin '{}' not found for '{}' in '{}'" "".format(part, plugin_name, package_name)) from None # Add plug-in to report if do_report: from where.reports import report code_kwargs = kwargs.copy() if "dset" in code_kwargs: code_kwargs["dset"] = code_kwargs["dset"].repr report.add( package_name, __plugin__=plugin.name, __doc__=plugin.function.__doc__, __text__="TODO", __code__= "kwargs = {}\n{} = plugins.call_one('{}', '{}', part='{}', **kwargs)" "".format(code_kwargs, plugin_name, package_name, plugin_name, part), **kwargs, ) # Call plug-in dependencies.add(plugin.file_path, label="plugin") if logger: logger(f"Start {plugin.name} in {package_name}") time_logger = log.time if use_timer else None else: time_logger = None with timer(f"Finish {plugin.name} ({package_name}) in", logger=time_logger): return plugin.function(**kwargs)