def get_gravitational_deformation(rundate): """Get excess delay due to gravitational deformation as a function of elevation Returns: A dictionary of interpolator functions. """ versions = list( config.files.glob_variable("vlbi_gravitational_deformation", "version", r"[\w]+")) dates = [datetime.strptime(d, "%Y%b%d") for d in versions] if dates: max_idx = dates.index(max(dates)) file_vars = dict(version=versions[max_idx]) else: file_vars = dict() parser = parsers.parse_key(file_key="vlbi_gravitational_deformation", file_vars=file_vars) log.debug(f"Using {parser.file_path} as a priori gravitional deformation") data = parser.as_dict() if parser.data_available else dict() interpolators = dict() for station, values in data.items(): if (datetime.combine(rundate, time.max) > values["start"] and datetime.combine(rundate, time.min) < values["end"]): interpolators[station] = interpolate.interp1d(values["elevation"], values["delay"], kind="cubic") return interpolators
def _register_tables(): """Import data tables in the current directory and register them """ directory = os.path.dirname(__file__) packagename = os.path.splitext(__name__)[0] for filename in glob.glob(os.path.join(directory, "*.py")): if filename.startswith("_") or filename.startswith("dataset3"): continue modulename = os.path.splitext(os.path.basename(filename))[0] module = importlib.import_module(packagename + "." + modulename) for clsname in dir(module): cls = getattr(module, clsname) try: is_table = issubclass(cls, Table) and cls.datatype is not None except TypeError: is_table = False if is_table: add_funcname = "add_" + cls.datatype read_funcname = "_read_" + cls.datatype log.debug( f"Registering data table {cls.datatype} from {filename} to Dataset.{add_funcname}" ) setattr(Dataset, add_funcname, Dataset._add(cls)) setattr(Dataset, read_funcname, Dataset._read(cls))
def gradient_model(dset): """Calculates asymmetric delay based on gradient model given in configuration file Args: dset (Dataset): Model data. Returns: numpy.ndarray: Troposphere asymmetric delay in [m] for each observation """ model = config.tech.get("gradients", section=MODEL, default="apg").str log.debug("Troposphere gradient model: {}", model) # Note: Be aware, that the apg.f function uses c = 0.0031 instead of c = 0.0032. mg = 1 / (np.sin(dset.site_pos.elevation) * np.tan(dset.site_pos.elevation) + 0.0032) if model == "none": gn = ge = np.zeros(dset.num_obs) elif model == "apg": gn, ge = apg_gradient_model(dset) else: log.fatal( "Unknown troposphere gradient model {}. Available models are {}", model, ", ".join(GRADIENT_MODELS)) log.debug("Troposphere gradients North and East (average): {} {} [m]", np.mean(gn), np.mean(ge)) return mg, gn, ge
def gnss_epoch_by_epoch_difference(dset: "Dataset") -> None: """Add epoch by epoch difference of observations to dataset Args: dset: A Dataset containing model data. """ for field in dset.obs.fields: log.debug( f"Add epoch by epoch difference for observation field '{field}' to dataset." ) diff = np.full(dset.num_obs, float('nan')) for sys in dset.unique("system"): idx_sys = dset.filter(system=sys) sys_tmp = dset.obs[field][idx_sys] for sat in set(dset.satellite[idx_sys]): idx_sat = dset.satellite[idx_sys] == sat sys_tmp[idx_sat] = np.insert( np.diff(dset.obs[field][idx_sys][idx_sat]), 0, float('nan'), ) # first epoch is not differenced, therefore NaN has to be inserted as first element. diff[idx_sys] = sys_tmp dset.add_float(f"diff_epo.{field}", val=diff, unit=dset.unit(f"obs.{field}"))
def gnss_dop(dset: "Dataset") -> None: """Adds dilution of precision (DOP) to dataset Args: dset: A Dataset containing model data. """ dops = { "gdop": np.zeros((dset.num_obs)), "pdop": np.zeros((dset.num_obs)), "tdop": np.zeros((dset.num_obs)), "hdop": np.zeros((dset.num_obs)), "vdop": np.zeros((dset.num_obs)), } # TODO: Check number of satellite observations !!! for time in dset.unique("time"): idx = dset.filter(time=time) dops["gdop"][idx], dops["pdop"][idx], dops["tdop"][idx], dops["hdop"][ idx], dops["vdop"][idx] = compute_dops( dset.site_pos.azimuth[idx], dset.site_pos.elevation[idx]) for dop, val in dops.items(): if dop in dset.fields: dset[dop][:] = val log.debug( f"{_SECTION}: Update gdop, pdop, hdop and vdop fields to Dataset." ) else: dset.add_float(dop, val=val) log.debug( f"{_SECTION}: Add gdop, pdop, hdop and vdop fields to Dataset." )
def data_handling(dset): """Edits data based on SLR handling file Args: dset: A Dataset containing model data. Returns: Array containing False for observations to throw away """ handling = apriori.get("slr_handling_file", time=dset.time) remove_idx = np.zeros(dset.num_obs, dtype=bool) for station in dset.unique("station"): # TODO: To be implemented if "V" in handling.get(station, {}): log.dev( f"TODO: Station {station}, marked with a V, not sure what that means" ) # X is data to be deleted # N is a non reliable station, not to be used for operational analysis # Q is a station in quarantene for key in ["X", "N", "Q"]: intervals = handling.get(station, {}).get(key, []) for interval in intervals: start_x, end_x = interval[0] int_idx = (dset.filter(station=station) & (dset.time.datetime >= start_x) & (dset.time.datetime <= end_x)) if np.any(int_idx): log.debug( f"Removed data for station {station} in interval {start_x}-{end_x}, marked with {key}" ) remove_idx |= int_idx return ~remove_idx
def zenith_hydrostatic_delay(pressure, latitude, height): """Calculates zenith hydrostatic delay based on configuration file definition Args: pressure: Array with atmospheric pressure for each observation in [hPa] latitude: Array with geodetic latitude for each observation in [rad] height: Array with orthometric height for each observation in [m] Returns: numpy.ndarray: Array with zenith hydrostatic delay for each observation in [m] """ model = config.tech.get("zenith_hydrostatic_delay", section=MODEL, default="saastamoinen").str log.debug("Troposphere zenith hydrostatic delay model: {}", model) if model == "saastamoinen": zhd = saastamoinen_zenith_hydrostatic_delay(pressure, latitude, height) else: log.fatal( "Unknown zenith hydrostatic troposphere delay model {}. Available models are {}", model, ", ".join(ZENITH_HYDROSTATIC_MODELS), ) log.debug("Troposphere zenith hydrostatic delay (average): {} [m]", np.mean(zhd)) return zhd
def interpolate_meteorological_data(self): """Calculate temperature, humidity and pressure at observation epochs Meteorological data are calculated at observation epochs by interpolating in the data given on the observation file for each station. Missing meteorological data are currently not handled. """ obs_time = self.meta["time"] max_obs = max(obs_time) min_obs = min(obs_time) for field, station in [(f, f[4:]) for f in self.data.keys() if f.startswith("met_")]: log.debug("Meteorological data available for station {}", station) met_time = self.data[field].pop("time") met_time_sorted = sorted(met_time) max_met_time = met_time_sorted[-1] min_met_time = met_time_sorted[0] max_idx = met_time.index(max_met_time) min_idx = met_time.index(min_met_time) for met_type in self.data[field].keys(): temp_array = np.zeros(len(obs_time)) for i in range(0, len(obs_time)): # Extrapolating one hour before and after available met data, other missing met data is set to zero if min_met_time <= obs_time[i] <= max_met_time: temp_array[i] = interpolate.interp1d( met_time, self.data[field][met_type])(obs_time[i]) elif min_met_time - 1 / 24 < obs_time[i] < min_met_time: temp_array[i] = self.data[field][met_type][min_idx] elif max_met_time < obs_time[i] < max_met_time + 1 / 24: temp_array[i] = self.data[field][met_type][max_idx] self.data[field][met_type] = temp_array
def write_to_dataset(self, dset): """Store DORIS data in a dataset Args: dset: The Dataset where data are stored. """ dset.num_obs = len(self.data["obs"]["time"]) dset.add_time("time", val=self.data["obs"].pop("time"), scale="utc", format="isot") dset.add_text("station", val=self.data["obs"].pop("station")) for field, value in self.data["obs"].items(): dset.add_float(field, val=np.array(value)) # Station data sta_fields = set().union(*[v.keys() for v in self.meta["station_info"].values()]) for field in sta_fields: dset.add_text(field, val=[self.meta["station_info"][s][field] for s in dset.station]) # Station positions site_pos = np.zeros((dset.num_obs, 3)) trf = apriori.get("trf", time=dset.time) for site in dset.unique("station"): idx = dset.filter(station=site) site_pos[idx, :] = trf[site].pos.itrs[idx, :] log.debug("Using position {} for {}", np.mean(site_pos[idx, :], axis=0), site) dset.add_position("site_pos", time="time", itrs=site_pos) # Satellite dset.add_text("satellite", val=[self.vars["sat_name"]] * dset.num_obs)
def plot_dop(self, figure_name: str="plot_dop.{FIGURE_FORMAT}", ) -> pathlib.PosixPath: """Plot DOP Args: figure_name: File name of figure. """ figure_path = self.figure_dir / figure_name.replace("{FIGURE_FORMAT}", self.figure_format) log.debug(f"Plot {figure_path}.") plot( x_arrays=[ self.dset.time.gps.datetime, self.dset.time.gps.datetime, self.dset.time.gps.datetime, self.dset.time.gps.datetime, self.dset.time.gps.datetime, ], y_arrays=[self.dset.gdop, self.dset.pdop, self.dset.vdop, self.dset.hdop, self.dset.tdop], xlabel="Time [GPS]", ylabel="Dilution of precision", y_unit="", labels=["GDOP", "PDOP", "VDOP", "HDOP", "TDOP"], figure_path=figure_path, opt_args={ "figsize": (7, 4), "legend": True, "plot_to": "file", }, ) return figure_path
def parse_args(*param_types, doc_module=None): """Parse command line arguments and general options Log versions of python, the script and the configuration. Finally parse arguments from the given parameter types. Args: param_types: Strings describing the expected parameter types. Each string must be one of the keys in #_PARSERS. Returns: List of command line arguments parsed according to param_types. """ # Log version of python and the program, and the configuration file used if doc_module: log.info(f"Start {_get_program_version(doc_module)} at {datetime.now().strftime(config.FMT_datetime)}") log.debug(f"Receive command line arguments [{', '.join(sys.argv[1:])}]") title, sources = get_configuration(cfg=get_program_name()) # TODO log something meaningful when session config already exists log.info(f"Use {title} configuration from {', '.join(sources)}") # Parse arguments try: arguments = [_PARSERS[type]() for type in param_types] except Exception: _print_help_from_doc(doc_module) raise # Return arguments (scalar if only one element, None if list is empty) if len(arguments) > 1: return arguments elif arguments: return arguments[0]
def init(fast_check=True, **dep_vars): """Start a clean list of dependencies The dep_vars describe which model run stage the dependency is valid for. These are cached, so after a first invocation (as is done in pipelines.run) repeated calls do not need to specify the dep_vars. Args: fast_check: Fast check uses timestamps, slow check uses md5 checksums. dep_vars: Variables specifying the model_run_depends-file. """ # Store current dependencies to disk write() # Update and cache variables _DEPENDENCY_FILE_VARS.clear() _DEPENDENCY_FILE_VARS["fast_check"] = fast_check _DEPENDENCY_FILE_VARS.update(dep_vars) # Delete any existing dependency file dep_path = files.path("model_run_depends", file_vars=_DEPENDENCY_FILE_VARS) try: dep_path.unlink() log.debug(f"Removing old dependency file {dep_path}") except FileNotFoundError: pass # If dependency file does not exist, we do not do anything # Register _write in case program exits without writing all dependencies to disk atexit.register(_write)
def read(self): """Read a dataset from file A dataset is stored on disk in two files, one JSON-file and one HDF5-file. Typically the HDF5-file is great for handling numeric data, while JSON is more flexible. The actual reading of the data is handled by the individual datatype table-classes. The dispatch to the correct class is done by functions defined in the :func:`Dataset._read`-method which is called by :mod:`where.data._data` when Dataset is first imported. """ # Open and read JSON-file json_path = files.path("dataset_json", file_vars=self.vars) with files.open_path(json_path, mode="rt", write_log=False) as f_json: json_all = json.load(f_json) if self.name not in json_all: raise FileNotFoundError("Dataset {} not found in file {}".format(self.name, json_path)) log.debug(f"Read dataset {self.vars['tech']}-{self.vars['stage']} from disk at {json_path.parent}") json_data = json_all[self.name] self._num_obs = json_data["_num_obs"] tables = json_data["_tables"] # Open HDF5-file with files.open_datafile("dataset_hdf5", file_vars=self.vars, mode="r", write_log=False) as f_hdf5: hdf5_data = f_hdf5[self.name] # Read data for each table by dispatching to read function based on datatype for table, dtype in tables.items(): read_func = getattr(self, "_read_" + dtype) read_func(table, json_data, hdf5_data) # Add meta and vars properties self.meta = json_data.get("_meta", dict()) self.vars = json_data.get("_vars", self.vars)
def changed(fast_check=True, **dep_vars): """Check if the dependencies of a model run have changed Returns True if any of the files in the dependency file have changed, or if the dependency file does not exist. Args: dep_vars: Variables specifying the model_run_depends-file. Returns: Boolean: True if any file has changed or if the dependecy file does not exist. """ # Make sure dependency file exists dependency_path = files.path("model_run_depends", file_vars=dep_vars) if not dependency_path.exists(): log.debug(f"Dependency file {dependency_path} does not exist") return True # Check if any dependencies have changed dependencies = Configuration.read_from_file("dependencies", dependency_path) for file_path in dependencies.section_names: previous_checksum = dependencies[file_path].checksum.str current_checksum = _file_info(file_path, fast_check=fast_check)["checksum"] if current_checksum != previous_checksum: log.debug(f"Dependency {file_path} changed from {previous_checksum} to {current_checksum}") return True return False
def pick_data(eop_data, time, window, sources): """Pick out subset of eop_data relevant for the given time epochs and interpolation window Args: eop_data (Dict): Dictionary of EOP data indexed by MJD dates. time (Time): Time epochs for which to calculate EOPs. window (Int): Interpolation window [days]. Returns: Dict: EOP data subset to the time period needed. """ if time.size == 1: start_time = np.floor(time.utc.mjd) - window // 2 end_time = np.ceil(time.utc.mjd) + window // 2 else: start_time = np.floor(time.utc.mjd.min()) - window // 2 end_time = np.ceil(time.utc.mjd.max()) + window // 2 sources = sources if sources else config.tech.eop_sources.list for source in sources: try: picked_data = {d: eop_data[source][d].copy() for d in np.arange(start_time, end_time + 1)} eop_path = config.files.path(f"eop_{source}") log.debug(f"Using a priori EOP values from {eop_path} ") return picked_data except KeyError: pass # No data found if we reached this point paths = [str(config.files.path(f"eop_{k}")) for k in sources] raise exceptions.MissingDataError( "Not all days in the time period {:.0f} - {:.0f} MJD were found in EOP-files {}" "".format(start_time, end_time, ", ".join(paths)) )
def _plot_position_kinematic(dset: "Dataset", figure_dir: "pathlib.PosixPath") -> None: """Plot site position plots for kinematic solution Args: dset: A dataset containing the data. figure_dir: Figure directory """ figure_path = figure_dir / f"plot_timeseries_llh.{FIGURE_FORMAT}" log.debug(f"Plot {figure_path}.") plot_scatter_subplots( x_array=dset.time.gps.datetime, y_arrays=[ np.rad2deg(dset.site_pos.llh.lat), np.rad2deg(dset.site_pos.llh.lon), dset.site_pos.llh.height ], xlabel="Time [GPS]", ylabels=["Latitude", "Longitude", "Height"], colors=["steelblue", "darkorange", "limegreen"], y_units=["degree", "degree", "meter"], figure_path=figure_path, opt_args={ "figsize": (6, 6.8), "plot_to": "file", "sharey": False, "title": "Site position", "statistic": ["min", "max"], }, ) figure_path = figure_dir / f"plot_horizontal_position.{FIGURE_FORMAT}" log.debug(f"Plot {figure_path}.") lon = np.rad2deg(dset.site_pos.llh.lon) lat = np.rad2deg(dset.site_pos.llh.lat) # Determine range dx = np.abs(np.max(lon) - np.min(lon)) dy = np.abs(np.max(lat) - np.min(lat)) incr = np.max([dx, dy]) / 2 # increment plot_scatter_subplots( x_array=lon, y_arrays=[lat], xlabel="Longitude [degree]", ylabels=["Latitude"], y_units=["degree"], figure_path=figure_path, opt_args={ "grid": True, "figsize": (6, 6), "plot_to": "file", "title": "Horizontal position", "xlim": [np.mean(lon) - incr, np.mean(lon) + incr], "ylim": [np.mean(lat) - incr, np.mean(lat) + incr], }, )
def _check_last_epoch_sample_point(dset, precise, epoch_interval): """Keep last observation epoch depending on existing precise orbit sample points Precise orbit sample points are needed to carry out interpolation of precise orbits for the last observation epochs. If no precise orbit sample point is available after the last satellite observation epochs, then this epochs will be removed for this satellite. The time difference between the last observation epochs and the next precise orbit sample point is determined. 'Last observation epoch' + 'sampling rate' is chosen as reference time for the selection of the nearest orbit sample point, which corresponds normally to 0:00 GPS time. If the time difference exceeds the following intervall, then the observation epochs are rejected: -(precise orbit epoch interval + 1) < time difference < 0 Args: dset (Dataset): A Dataset containing model data. dset_idx (numpy.ndarray): Array containing False for observations to throw away. The array is returned by function `ignore_unavailable_orbit_satellites()`, which deletes unavailable apriori orbit satellites. precise (PreciseOrbit): Precise orbit object with precise orbit information. epoch_interval (float): Epoch interval of precise orbit sample points Returns: tuple: Tuple with array containing False for last observations to throw away and indices indicating last observation epoch. """ sampling_rate = config.tech.sampling_rate.float # Get indices for last observation epochs last_idx = -1 last_epoch_idx = np.ones(dset.num_obs, dtype=bool) last_epoch_idx = (dset.time.gps.mjd >= dset.time.gps.mjd[last_idx] - (epoch_interval - sampling_rate) * Unit.second2day) # Get set with satellite and time entries for getting corresponding precise orbit sample points # Note: Sample point reference time is 'last observation epoch' + 'sampling rate', which corresponds normally to # 0:00 GPS time. satellites = dset.satellite[last_epoch_idx] time = Time(val=dset.time.gps.datetime[last_idx], fmt="datetime", scale=dset.time.scale) + TimeDelta( sampling_rate, fmt="seconds", scale=dset.time.scale) precise_idx = precise._get_nearest_sample_point(satellites, time) # Keep observations epochs, where a precise orbit sample point exists after the last observation epoch diff_time = (dset.time.gps.mjd[last_epoch_idx] - precise.dset_edit.time.gps.mjd[precise_idx]) * Unit.day2second keep_idx = np.logical_and(diff_time > -(epoch_interval + 1), diff_time < 0) removed_entries = "DEBUG: ".join([ f"{s} {t.strftime(' %Y-%m-%d %H:%M:%S (GPS)')}, dt = {dt:8.2f} s ({-(epoch_interval + 1)} < dt < 0)\n" for s, t, dt in zip( satellites[np.logical_not(keep_idx)], dset.time.gps.datetime[last_epoch_idx][np.logical_not(keep_idx)], diff_time[np.logical_not(keep_idx)], ) ]) log.debug(f"Following last epoch entries are removed: \n{removed_entries}") return keep_idx, last_epoch_idx
def zenith_wet_delay(dset, temperature, e, tm, lambd): """Calculates zenith wet delay based on configuration file definition Args: dset (Dataset): A Dataset containing model data temperature (numpy.ndarray): Temperature for each observation in [Celsius] e (numpy.ndarray): Water vapor pressure for each observation in [hPa] tm (numpy.ndarray): Mean temperature of the water vapor for each observation in [K] lambd (numpy.ndarray): Water vapor decrease factor for each observation Returns: numpy.ndarray: Zenith wet delay values for each observation in [m]. """ model = config.tech.get("zenith_wet_delay", section=MODEL, default="").str mapping_function = config.tech[MODEL].mapping_function.str # Use default zenith wet delay models, if no model is defined in configuration file if not model: try: model = MAPPING_ZENITH_WET_RELATION[mapping_function] except KeyError: log.fatal( "Unknown mapping function {}. Available mapping functions are {}", mapping_function, ", ".join(MAPPING_FUNCTIONS), ) log.debug("Troposphere zenith wet delay model: {}", model) if model == "none": zwd = np.zeros(dset.num_obs) elif model == "askne": zwd = askne_zenith_wet_delay(e, tm, lambd) # TODO: log.fatal("Meteorological model '{}' does not provide input parameters for using Askne and " # "Nordius zenith wet delay model. Use 'gpt2w' model.", met_model) elif model == "davis": latitude, _, height = dset.site_pos.llh.T zwd = davis_zenith_wet_delay(latitude, height, temperature, e) elif model == "saastamoinen": latitude, _, height = dset.site_pos.llh.T zwd = saastamoinen_zenith_wet_delay(latitude, height, temperature, e) # TODO: log.fatal("Meteorological model '{}' does not provide input parameters for using Saastamoinen " # "zenith wet delay model. Use 'gpt2' or 'gpt2w' model.", met_model) elif model == "vmf1_gridded": zwd = vmf1_zenith_wet_delay(dset) else: log.fatal( "Unknown zenith wet troposphere delay model {}. Available models are {}", model, ", ".join(ZENITH_WET_DELAY_MODELS), ) log.debug("Troposphere zenith wet delay (average): {} [m]", np.mean(zwd)) return zwd
def delete_from_file(self, tech=None, stage=None, dataset_name=None, dataset_id=None): """Delete this or related datasets from file Specify arguments relative to this dataset to find datasets which will be deleted. """ # Use existing names as default tech = self.vars["tech"] if tech is None else tech stage = self.vars["stage"] if stage is None else stage dataset_name = self.dataset_name if dataset_name is None else dataset_name if dataset_id is None: dataset_id = self.dataset_id else: dataset_id = _data.parse_dataset_id(self.rundate, tech, stage, dataset_name, dataset_id) dataset_id = {dataset_id} if isinstance(dataset_id, (float, int)) else set(dataset_id) ids_to_delete = dataset_id & set( _data.list_dataset_ids(self.rundate, tech, dataset_name, stage, dataset_name)) if not ids_to_delete: return # Open JSON and HDF5 file and remove datasets file_vars = dict(self.vars, tech=tech, stage=stage) json_path = files.path("dataset_json", file_vars=file_vars) with files.open_path(json_path, mode="rt", write_log=False) as f_json: json_all = json.load(f_json) with files.open_datafile("dataset_hdf5", file_vars=file_vars, mode="a", write_log=False) as f_hdf5: for id_to_delete in ids_to_delete: name = "{name}/{id:04d}".format(name=dataset_name, id=id_to_delete) del json_all[name] del f_hdf5[name] log.debug( "Deleted {name} from dataset {tech}-{stage} at {directory}", name=name, tech=tech, stage=stage, directory=json_path.parent, ) with files.open_path(json_path, mode="wt", write_log=False) as f_json: json.dump(json_all, f_json) # Delete files if all datasets are deleted if not any(["/" in k for k in json_all.keys()]): json_path.unlink() files.path("dataset_hdf5", file_vars=file_vars).unlink()
def _ref_ellipsoid(self): """ID of reference ellipsoid specified in the configuration file The IDs correspond to the ones used by the SOFA library, WGS84: 1, GRS80: 2, WGS72: 3. """ ref_ellipsoid_cfg = config.tech.get("reference_ellipsoid") ref_ellipsoid = ref_ellipsoid_cfg.as_enum("reference_ellipsoid") log.debug("Using reference ellipsoid {} as specified in {}", ref_ellipsoid.name, ref_ellipsoid_cfg.source) return ref_ellipsoid
def gnss_linear_combination(dset: "Dataset") -> None: """Add GNSS linar observation combinations to dataset Args: dset: A Dataset containing model data. """ func = { "code_multipath": linear_combination_cmc, "geometry_free": linear_combination, "ionosphere_free": linear_combination, "melbourne_wuebbena": linear_combination_melbourne, "narrow_lane": linear_combination, "wide_lane": linear_combination, } for comb_name in config.tech[_SECTION].linear_combination.list: log.debug(f"Add {comb_name} combination to dataset.") # Code-multipath linear combination if comb_name == "code_multipath": try: cmc1, cmc2 = func[comb_name](dset) except ValueError: log.warn( f"Code multipath linear combination is not added to dataset. Dual-frequency code and phase " f"observations are needed.") continue dset.add_float(f"lin.{comb_name}", val=cmc1["val"], unit="meter") dset.add_float(f"lin.{comb_name}", val=cmc2["val"], unit="meter") elif comb_name == "melbourne_wuebbena": try: linear_comb = func[comb_name](dset) except ValueError: log.warn( f"Melbourne-Wübbena linear combination is not added to dataset. Dual-frequency code and " f"phase observations are needed.") continue dset.add_float( f"lin.{comb_name}", val=linear_comb["val"], unit="meter", ) else: linear_comb = func[comb_name](comb_name, dset) for obs_code in linear_comb.keys(): dset.add_float( f"lin.{comb_name}", val=linear_comb[obs_code]["val"], unit="meter", )
def calculate_data(self): """ TODO: Description? """ for calculator in self.setup_calculators(): log.debug( f"Start calculator {calculator.__name__} in {self.__module__}") with Timer( f"Finish calculator {calculator.__name__} ({self.__module__}) in", logger=log.debug): calculator()
def plot_satellite_availability( self, figure_name: str="plot_satellite_availability.{FIGURE_FORMAT}", ) -> pathlib.PosixPath: """Generate GNSS satellite observation availability overview based on RINEX observation file Args: figure_name: File name of figure. Returns: Figure path. """ # Generate x- and y-axis data per system x_arrays = [] y_arrays = [] labels = [] figure_path = self.figure_dir / figure_name.replace("{FIGURE_FORMAT}", self.figure_format) log.debug(f"Plot {figure_path}.") time, satellite, system = self._sort_by_satellite() for sys in sorted(self.dset.unique("system"), reverse=True): idx = system == sys x_arrays.append(time[idx]) y_arrays.append(satellite[idx]) labels.append(enums.gnss_id_to_name[sys].value) # Plot scatter plot num_sat = len(self.dset.unique("satellite")) plot( x_arrays=x_arrays, y_arrays=y_arrays, xlabel="Time [GPS]", ylabel="Satellite", y_unit="", #labels=labels, figure_path=figure_path, opt_args={ "colormap": "tab20", "figsize": (0.1 * num_sat, 0.2 * num_sat), "fontsize": 10, "legend": True, "legend_location": "bottom", "legend_ncol": len(self.dset.unique("system")), "plot_to": "file", "plot_type": "scatter", #"title": "Satellite availability", }, ) return figure_path
def _read(self, dset_raw): """Read SP3 orbit file data and save it in a Dataset In addition to the given date, we read data for the day before and after. This is needed to carry out correct orbit interpolation at the start and end of a day. TODO: How well fits the orbits from day to day? Is it necessary to align the orbits? Args: dset_raw (Dataset): Dataset representing raw data from apriori orbit files """ date_to_read = dset_raw.rundate - timedelta(days=self.day_offset) file_paths = list() # Loop over days to read while date_to_read <= dset_raw.rundate + timedelta( days=self.day_offset): if self.file_path is None: file_path = files.path( self.file_key, file_vars=config.date_vars(date_to_read)) else: file_path = self.file_path log.debug(f"Parse precise orbit file {file_path}") # Generate temporary Dataset with orbit file data dset_temp = data.Dataset( rundate=date_to_read, tech=dset_raw.vars["tech"], stage="temporary", dataset_name="", dataset_id=0, empty=True, ) parser = parsers.parse(parser_name="orbit_sp3", file_path=file_path, rundate=date_to_read) parser.write_to_dataset(dset_temp) file_paths.append(str(parser.file_path)) # Extend Dataset dset_raw with temporary Dataset date = date_to_read.strftime("%Y-%m-%d") dset_raw.copy_from( dset_temp, meta_key=date) if dset_raw.num_obs == 0 else dset_raw.extend( dset_temp, meta_key=date) dset_raw.add_to_meta("parser", "file_path", file_paths) date_to_read += timedelta(days=1) return dset_raw
def remove_empty_systems(self): """Remove GNSSs without observations from `self.meta['obstypes']`. The GNSSs are defined in RINEX header (SYS / # / OBS TYPES ). It can happen, that no observations are available for a given GNSS. GNSSs without observations are deleted from dictionary `self.meta['obstypes']` in this routine. """ for sys in list(self.meta["obstypes"].keys()): if sys not in self.data["text"]["system"]: log.debug( f"No observation given for GNSS {sys!r}. GNSS {sys!r} is removed from Dataset." ) del self.meta["obstypes"][sys]
def delete_file(file_path): """Deletes a file Does not delete directories Args: file_path(Path): Path to a file """ try: file_path.unlink() log.debug("Deleted {}".format(file_path)) except OSError: log.warn("Unable to delete {}", file_path)
def write(self, write_level=None): """Write a dataset to file A dataset is stored on disk in two files, one JSON-file and one HDF5-file. Typically the HDF5-file is great for handling numeric data, while JSON is more flexible. The actual writing of the data is handled by the individual datatype table-classes. These classes are free to choose how they divide the data between the JSON- and HDF5-files, as long as they are able to recover all the data. """ json_path = files.path("dataset_json", file_vars=self.vars) log.debug(f"Write dataset {self.vars['tech']}-{self.vars['stage']} to disk at {json_path.parent}") # Read write level from config write_level = config.tech.get("write_level", value=write_level).as_enum("write_level").name # Read existing data in JSON-file try: with files.open_path(json_path, mode="rt", write_log=False) as f_json: json_all = json.load(f_json) except FileNotFoundError: json_all = dict() json_all.setdefault(self.name, dict()) json_data = json_all[self.name] # Figure out which tables have data tables = [t for t in self._data.values() if t.get_fields(write_level)] # Open HDF5-file with files.open_datafile("dataset_hdf5", file_vars=self.vars, mode="a", write_log=False) as f_hdf5: if self.name in f_hdf5: del f_hdf5[self.name] hdf5_data = f_hdf5.create_group(self.name) # Write data for each table (HDF5-data are automatically written to disk) for table in tables: table.write(json_data, hdf5_data, write_level) # Store metadata in JSON-data json_data["_version"] = where.__version__ json_data["_num_obs"] = self.num_obs json_data["_tables"] = {tbl.name: tbl.datatype for tbl in tables} json_data["_units"] = {tbl.name: tbl._units for tbl in tables} json_data["_write_levels"] = {tbl.name: tbl._write_level_strings for tbl in tables} json_data["_meta"] = self.meta json_data["_vars"] = self.vars # Store last dataset_id written to json_all.setdefault(self.dataset_name, dict())["_last_dataset_id"] = self.dataset_id # Write JSON-data to file with files.open_path(json_path, mode="wt", write_log=False) as f_json: json.dump(json_all, f_json)
def _interpolate_meteorological_data(dset, data, rundate): """Calculate temperature, humidity and pressure at observation epochs Meteorological data are calculated at observation epochs by interpolating in the data given on the observation file for each station. Missing meteorological data are currently not handled. """ rundate = datetime(rundate.year, rundate.month, rundate.day) for field, station in [(f, f[4:]) for f in data.keys() if f.startswith("met_")]: log.debug(f"Meteorological data available for station {station}") met_time = data[field].pop("met_time") flat_list = [item for sublist in met_time for item in sublist] met_time_float = np.array([(flat_list[i] - rundate).total_seconds() for i in range(0, len(flat_list))]) met_time_unique, met_index = np.unique(met_time_float, return_index=True) diff = len(met_time_float) - len(met_time_unique) if diff > 0: log.dev(f"Removed duplicate met data for station {station}") log.dev("Do this for the actual obs data also!") if len(met_time_unique) == 1: for met_type in data[field].keys(): data[field][met_type] = np.repeat(data[field][met_type][0], dset.num_obs) continue # Extrapolation one month before/after # (this is overkill, most of these values will be removed later when taking the diagonal) min_time = min(met_time_unique) - 31 * 86400 max_time = max(met_time_unique) + 31 * 86400 met_time_unique = np.hstack( (np.array(min_time), met_time_unique, np.array(max_time))) for met_type in data[field].keys(): met_data_array = data[field][met_type] flat_list = [ item for sublist in met_data_array for item in sublist ] met_data_array = np.array([flat_list[i] for i in met_index]) met_data_array = np.hstack( (met_data_array[0], met_data_array, met_data_array[-1])) data[field][met_type] = interpolation.interpolate(met_time_unique, met_data_array, dset.obs_time, kind="cubic") return data
def _table_outlier_overview(dset: "Dataset"): """Generate Dataframe table with overview over number of navigation messages Args: dset: A dataset containing the data. Returns: Dataframe with satellites as indices and following columns: | Name | Description | |-------------|----------------------------------------------------------------------------------------------| | outlier | Number of outliers for each satellite | Example: | |outlier | |----|--------| | G01| 0 | | G02| 11 | | G03| 3 | | .. | ... | | SUM| 42 | """ columns = ["outlier"] df = pd.DataFrame(columns=columns) dset_outlier = _get_outliers_dataset(dset) if dset_outlier == enums.ExitStatus.error: # NOTE: This is the case for concatencated Datasets, where "calculate" stage data are not available. log.warn( f"No data for calculate stage available. Outliers can not be detected." ) return df if dset_outlier.num_obs: log.debug("No outlier detected.") return df for satellite in sorted(dset.unique("satellite")): idx = dset_outlier.filter(satellite=satellite) row = [len(dset_outlier.satellite[idx])] df = df.append(pd.DataFrame([row], columns=columns, index=[satellite])) df = df.append( pd.DataFrame([[len(dset_outlier.satellite)]], columns=columns, index=["**SUM**"])) return df
def plot_number_of_satellites( self, figure_name: str="plot_gnss_number_of_satellites_epoch.{FIGURE_FORMAT}", ) -> pathlib.PosixPath: """Plot number of satellites based for each GNSS Args: figure_name: File name of figure. Returns: Figure path. """ # Generate x- and y-axis data per system x_arrays = [] y_arrays = [] labels = [] figure_path = self.figure_dir / figure_name.replace("{FIGURE_FORMAT}", self.figure_format) log.debug(f"Plot {figure_path}.") for sys in sorted(self.dset.unique("system")): idx = self.dset.filter(system=sys) x_arrays.append(self.dset.time.gps.datetime[idx]) y_arrays.append(gnss.get_number_of_satellites( self.dset.system[idx], self.dset.satellite[idx], self.dset.time.gps.datetime[idx]) ) labels.append(enums.gnss_id_to_name[sys].value) # Plot scatter plot plot( x_arrays=x_arrays, y_arrays=y_arrays, xlabel="Time [GPS]", ylabel="# satellites", y_unit="", labels=labels, figure_path=figure_path, opt_args={ "figsize": (7, 4), "marker": ",", "legend": True, "legend_location": "bottom", "legend_ncol": len(self.dset.unique("system")), "plot_to": "file", "plot_type": "plot" }, ) return figure_path