def solve_neq(dset): log.info("Solving normal equations") names = dset.meta["normal equation"]["names"] n = len(names) d = np.zeros((n, 6)) fix_param_weight = np.zeros(n) H = np.zeros((6, n)) stations = set() from where import apriori reference_frame = config.tech.reference_frames.list[0] trf = apriori.get("trf", time=dset.time.utc.mean, reference_frames=reference_frame) # thaller2008: eq 2.51 (skipping scale factor) for idx, column in enumerate(names): if "_site_pos-" not in column: continue station = column.split("-", maxsplit=1)[-1].rsplit("_", maxsplit=1)[0] site_id = dset.meta[station]["site_id"] if site_id in trf: x0, y0, z0 = trf[site_id].pos.trs if column.endswith("_x"): d[idx, :] = np.array([1, 0, 0, 0, z0, -y0]) if column.endswith("_y"): d[idx, :] = np.array([0, 1, 0, -z0, 0, x0]) if column.endswith("_z"): d[idx, :] = np.array([0, 0, 1, y0, -x0, 0]) stations.add(station) if len(stations) >= 3: try: # thaller2008: eq 2.57 H = np.linalg.inv(d.T @ d) @ d.T log.info( f"Applying NNT/NNR with {', '.join(stations)} from {reference_frame.upper()}" ) except np.linalg.LinAlgError: log.warn(f"Unable to invert matrix for NNR/NNT constraints") else: log.info( f"Too few stations to use NNR/NNT contraints from {reference_frame.upper()}. Using absolute constraints for station positions." ) # Too few stations to use NNT/NNR? for idx, column in enumerate(names): if "_site_pos-" not in column: continue station = column.split("-", maxsplit=1)[-1].rsplit("_", maxsplit=1)[0] fix_param_weight[idx] = 1 / (1e-6)**2 # 1/meters**2 sigmas = [0.0001] * 3 + [1.5e-11] * 3 # NNR to CRF if "celestial_reference_frames" in config.tech.master_section: celestial_reference_frame = config.tech.celestial_reference_frames.list[ 0] crf = apriori.get("crf", time=dset.time, celestial_reference_frames=celestial_reference_frame) H2 = np.zeros((3, n)) for idx, column in enumerate(names): if "_src_dir-" not in column: continue source = column.split("-", maxsplit=1)[-1].split("_")[0] if source in crf: ra = crf[source].pos.right_ascension dec = crf[source].pos.declination if dset.num(source=source) < 5: fix_param_weight[idx] = 1 / (1e-12)**2 # 1/radians**2 if column.endswith("_ra"): log.info( f"Too few observations for source {source}. Using absolute constraints for source positions." ) continue if column.endswith("_ra"): H2[0, idx] = -np.cos(ra) * np.sin(dec) * np.cos(dec) H2[1, idx] = -np.sin(ra) * np.sin(dec) * np.cos(dec) H2[2, idx] = np.cos(dec)**2 if column.endswith("_dec"): H2[0, idx] = np.sin(ra) H2[1, idx] = -np.cos(ra) if H2.any(): log.info( f"Applying NNR constraint to {celestial_reference_frame.upper()}" ) # add NNR to CRF constraints H = np.concatenate((H, H2)) sigmas = sigmas + [1e-6] * 3 # thaller2008: eq 2.45 P_h = np.diag(1 / np.array(sigmas)**2) # Free network constraints: thaller2008: eq 2.58 N = np.array(dset.meta["normal equation"]["matrix"]) N_h = N + H.T @ P_h @ H # Baselines with too few obs? for idx, column in enumerate(names): if "_baseline-" not in column: continue baseline = column.split("-", maxsplit=1)[-1].rsplit("_", maxsplit=1)[0] if dset.num(baseline=baseline) < 5: fix_param_weight[idx] = 1 / (1e-6)**2 # 1/meters**2 log.info( f"Too few observations for baseline {baseline}. Constrained to a priori value" ) continue # Absolute constraints (on sources with too few observations): thaller2008: eq.2.49 N_h += np.diag(fix_param_weight) # solve neq N_h_inv = np.linalg.inv(N_h) b = np.array(dset.meta["normal equation"]["vector"]) x = N_h_inv @ b[:, None] # Covariance: thaller2008: eq 2.16 variance_factor = dset.meta["statistics"]["variance factor"] Q_xx = variance_factor**2 * N_h_inv dset.meta.add("solution", x[:, 0].tolist(), section="normal equation") dset.meta.add("covariance", Q_xx.tolist(), section="normal equation")
def get_args(rundate, input_args=None): """Convert where_runner arguments to where arguments for given date Args: rundate (date): The model run date. Returns: List: Strings with names of available sessions. """ keyword = "--session" session_list = set() input_args = list(input_args) if input_args is not None else list() for idx in range(len(input_args)): key, _, value = input_args[idx].partition("=") if key == keyword: session_list = set(value.split(",")) input_args.pop(idx) break args = " ".join(input_args) get_session_from_master = config.where.get( "get_session_from_master", section=pipeline, value=util.read_option_value( "--get_session_from_master", default=None), # TODO: add this to mg_config default=False, ).bool if get_session_from_master: skip_sessions = set( config.where.get( "skip_sessions", section="runner", value=util.read_option_value("--skip_sessions", default=None), default="", ).list) session_types = config.where.get( "session_types", section="runner", value=util.read_option_value("--session_types", default=None), default="", ).list master_schedule = apriori.get("vlbi_master_schedule", rundate=rundate) sessions = set( master_schedule.list_sessions(rundate, session_types=session_types)) check_master_status = config.where.get( "check_master_status", section="runner", value=util.read_option_value("--check_master_status", default=None), default=False, ).bool not_ready_sessions = set() if check_master_status: for session in sessions: if not master_schedule.ready(rundate, session): status = master_schedule.status(rundate, session) log.warn( f"{rundate} {session} is not ready for processing. Master file status: '{status}'. Skipping session." ) not_ready_sessions.add(session) sessions = set(sessions) - skip_sessions - not_ready_sessions sessions = sessions & session_list if session_list else sessions return [keyword + "=" + s + " " + args for s in sessions] else: obs_format = config.tech.get( "obs_format", section=pipeline ).str # TODO: This always falls back on config.where .. file_vars = config.create_file_vars(rundate, pipeline, session=None) del file_vars[ "session"] # TODO: Do not add None variables to file_vars? sessions = config.files.glob_variable(f"vlbi_obs_{obs_format}", variable="session", pattern=r"\w{2}", file_vars=file_vars) sessions = sessions & session_list return [keyword + "=" + s + " " + args for s in sessions]
def _generate_dataframes( dset: Dict[str, "Dataset"]) -> Dict[str, pd.core.frame.DataFrame]: """Generate dataframe based on station datasets The dataframe for each station in dictionary "dfs" has following columns: east: East-coordinate in topocentric system north: North-coordinate in topocentric system up: Up-coordinate in topocentric system hpe: horizontal position error vpe: vertical position error Example for "dfs" dictionary: 'hons': time.gps site_vel_h site_vel_3d 0 2019-03-01 00:00:00 0.301738 0.057244 1 2019-03-01 00:00:00 0.301738 0.057244 'krss': time.gps site_vel_h site_vel_3d 0 2019-03-01 00:00:00 0.710014 0.186791 1 2019-03-01 00:00:00 0.710014 0.186791 Example for "dfs_day" dictionary: 'site_vel_h': nabf vegs hons krss time.gps 2019-03-01 1.368875 0.935687 1.136763 0.828754 2019-03-02 0.924839 0.728280 0.911677 0.854832 'site_vel_3d': nabf vegs hons krss time.gps 2019-03-01 1.715893 1.147265 1.600330 0.976541 2019-03-02 1.533437 1.307373 1.476295 1.136991 Example for "dfs_month" dictionary: 'site_vel_h': nabf vegs hons krss Mar-2019 1.186240 0.861718 1.095827 1.021354 Apr-2019 0.891947 0.850343 0.977908 0.971099 'site_vel_3d': nabf vegs hons krss Mar-2019 1.854684 1.291406 1.450466 1.225467 Apr-2019 1.964404 1.706507 1.687994 1.500742 Args: dset: Dictionary with station name as keys and the belonging Dataset as value Returns: Tuple with following entries: | Element | Description | |----------------------|--------------------------------------------------------------------------------------| | dfs | Dictionary with station name as keys and the belonging dataframe as value with | | | following dataframe columns: site_vel_h, site_vel_3d | | dfs_day | Dictionary with fields as keys (e.g. site_vel_h, site_vel_3d) and the belonging | | | dataframe as value with DAILY samples of 95th percentile and stations as columns. | | dfs_month | Dictionary with fields as keys (e.g. site_vel_h, site_vel_3d) and the belonging | | | dataframe as value with MONTHLY samples of 95th percentile and stations as columns. | """ dsets = dset dfs = {} dfs_day = {"site_vel_h": pd.DataFrame(), "site_vel_3d": pd.DataFrame()} dfs_month = {"site_vel_h": pd.DataFrame(), "site_vel_3d": pd.DataFrame()} for station, dset in dsets.items(): if dset.num_obs == 0: log.warn(f"Dataset '{station}' is empty.") continue # Determine dataframe with site_vel_h and vel_3d columns # TODO: How to ensure that GPS time scale is used? fields=["time.gps", ...] does not work longer. df = dset.as_dataframe(fields=["time", "site_vel_h", "site_vel_3d"]) if df.empty: continue else: # Save data in dictionaries dfs.update({station: df}) # TODO This does not work anymore for Pandas version 1.0: df_day = df.set_index("time").resample("D", how=lambda x: np.nanpercentile(x, q=95)) df_day = df.set_index("time").resample("D").apply( lambda x: np.nanpercentile(x, q=95)) for field in dfs_day.keys(): if dfs_day[field].empty: dfs_day[field][station] = df_day[field] else: dfs_day[field] = pd.concat([dfs_day[field], df_day[field]], axis=1) dfs_day[field] = dfs_day[field].rename( columns={field: station}) # TODO This does not work anymore for Pandas version 1.0: df_month = df.set_index("time").resample("M", how=lambda x: np.nanpercentile(x, q=95)) df_month = df.set_index("time").resample("M").apply( lambda x: np.nanpercentile(x, q=95)) df_month.index = df_month.index.strftime("%b-%Y") for field in dfs_month.keys(): dfs_month[field][station] = df_month[field] return dfs, dfs_day, dfs_month
def add_to_full_timeseries(dset): """Write some key variables to the full timeseries Args: dset: Dataset, data for a model run. """ log.info(f"Updating timeseries dataset") dset_session = dataset.Dataset() # Add data to dset_session idx_fields = config.tech[WRITER].index.list field_values = [["all"] + list(dset.unique(f)) for f in idx_fields] idx_values = dict(zip(idx_fields, zip(*itertools.product(*field_values)))) # TODO: Remove combinations where filter leaves 0 observations num_obs = len(idx_values[ idx_fields[0]]) # Length of any (in this case the first) field mean_epoch = dset.time.mean.utc rundate_str = dset.analysis["rundate"].strftime(config.FMT_date) session = dset.vars.get("session", "") status = dset.meta.get("analysis_status", "unchecked") session_type = dset.meta.get("input", dict()).get("session_type", "") dset_session.num_obs = num_obs dset_session.add_time("time", val=[mean_epoch] * num_obs, scale=mean_epoch.scale, fmt=mean_epoch.fmt) dset_session.add_text("rundate", val=[rundate_str] * num_obs) dset_session.add_text("session", val=[session] * num_obs) dset_session.add_text("status", val=[status] * num_obs) dset_session.add_text("session_type", val=[session_type] * num_obs) for field, value in idx_values.items(): dset_session.add_text(field, val=value) default_dset_str = f"{dset.vars['stage']}/{dset.vars['label']}" dsets = {default_dset_str: dset} for method, cfg_entry in config.tech[WRITER].items(): try: method_func = getattr(sys.modules[__name__], f"method_{method}") except AttributeError: log.warn(f"Method {method!r} is unknown") continue for field_cfg in cfg_entry.as_list(split_re=", *"): field_out = re.sub("[ -/:]", "_", field_cfg) func, _, field_dset = field_cfg.rpartition(":") field_in, _, dset_str = field_dset.partition("-") func = func if func else field_in dset_str = dset_str if dset_str else default_dset_str if dset_str not in dsets: stage, _, dset_id = dset_str.partition("/") dset_id = int(dset_id) if dset_id else "last" dsets[dset_str] = dataset.Dataset.read( rundate=dset.analysis["rundate"], pipeline=dset.vars["pipeline"], stage=stage, session=dset.vars["session"], label=dset_id, id=dset.analysis["id"], ) val, adder, unit = method_func(dsets[dset_str], field_in, idx_values, func) if adder: add_func = getattr(dset_session, adder) add_func(field_out, val=val, unit=unit) # hack to get solved neq data into the time series: # TODO: unhack this :P Add as a method_neq instead? if "normal equation" in dset.meta: _add_solved_neq_fields(dset, dset_session, idx_values) # Read timeseries dataset and extend it with session dataset dset_id = config.tech.timeseries.dataset_id.str.format(**dset.vars) try: # Read existing dataset dset_ts = dataset.Dataset.read( rundate=date(1970, 1, 1), pipeline=dset.vars["pipeline"], stage="timeseries", label=dset_id, session="", use_options=False, id=dset.analysis["id"], ) except OSError: # Start new timeseries dataset dset_ts = dataset.Dataset( rundate=date(1970, 1, 1), pipeline=dset.vars["pipeline"], stage="timeseries", label=dset_id, session="", use_options=False, id=dset.analysis["id"], ) if dset_ts.num_obs > 0: # Filter timeseries dataset to remove any previous data for this rundate and session keep_idx = np.logical_not( dset_ts.filter(rundate=rundate_str, session=session)) dset_ts.subset(keep_idx) # Extend timeseries dataset with dset_session and write to disk dset_ts.extend(dset_session) dset_ts.write()
def vlbi_grav_delay(dset): """Calculate the gravitational delay The implementation is described in IERS Conventions [1]_, section 11.1, in particular equation (11.9). Args: dset: A Dataset containing model data. Returns: Numpy array: Gravitational delay in meters for each observation. """ eph = apriori.get("ephemerides", time=dset.time) grav_delay = np.zeros(dset.num_obs) # List of celestial bodies. Major moons are also recommended, like Titan, Ganymedes, ... bodies = [ "mercury barycenter", "venus barycenter", "earth", "moon", "mars barycenter", "jupiter barycenter", "saturn barycenter", "uranus barycenter", "neptune barycenter", "pluto barycenter", "sun", ] bcrs_vel_earth = eph.vel_bcrs("earth") baseline_gcrs = dset.site_pos_2.gcrs_pos - dset.site_pos_1.gcrs_pos src_dot_baseline = ( dset.src_dir.unit_vector[:, None, :] @ baseline_gcrs[:, :, None])[:, 0, 0] # Equation 11.6 bcrs_site1 = eph.pos_bcrs("earth") + dset.site_pos_1.gcrs_pos bcrs_site2 = eph.pos_bcrs("earth") + dset.site_pos_2.gcrs_pos for body in bodies: try: GM_body = constant.get("GM_{}".format(body.split()[0]), source=eph.ephemerides) except KeyError: log.warn( "The GM value of {} is not defined for {}. Correction set to zero.", body.split()[0].title(), eph.ephemerides, ) continue bcrs_body_t1 = eph.pos_bcrs(body) # Equation 11.3 delta_t = TimeDelta( np.maximum( 0, dset.src_dir.unit_vector[:, None, :] @ (bcrs_body_t1 - bcrs_site1)[:, :, None])[:, 0, 0] * unit.second2day / constant.c, format="jd", scale="tdb", ) time_1J = dset.time.tdb - delta_t # Equation 11.4 bcrs_body_t1J = eph.pos_bcrs(body, time=time_1J) vector_body_site1 = bcrs_site1 - bcrs_body_t1J # Equation 11.5 vector_body_site2 = bcrs_site2 - bcrs_body_t1J - bcrs_vel_earth / constant.c * src_dot_baseline[:, None] # Needed for equation 11.1 norm_body_site1 = np.linalg.norm(vector_body_site1, axis=1) src_dot_vector_body_site1 = (dset.src_dir.unit_vector[:, None, :] @ vector_body_site1[:, :, None])[:, 0, 0] nomJ = norm_body_site1 + src_dot_vector_body_site1 denomJ = (np.linalg.norm(vector_body_site2, axis=1) + (dset.src_dir.unit_vector[:, None, :] @ vector_body_site2[:, :, None])[:, 0, 0]) # Main correction (equation 11.1) grav_delay += 2 * GM_body / constant.c**2 * np.log(nomJ / denomJ) # Higher order correction (equation 11.14) baseline_dot_vector_body_site1 = ( baseline_gcrs[:, None, :] @ vector_body_site1[:, :, None])[:, 0, 0] grav_delay += (4 * GM_body**2 / constant.c**4 * (baseline_dot_vector_body_site1 / norm_body_site1 + src_dot_baseline) / (norm_body_site1 + src_dot_vector_body_site1)**2) # Denominator (equation 11.9) denominator = ( 1 + ((bcrs_vel_earth + dset.site_pos_2.gcrs_vel)[:, None, :] @ dset.src_dir.unit_vector[:, :, None] / constant.c)[:, 0, 0]) return grav_delay / denominator
def file_vars(): """File variables that will be available during the running of this technique In addition, date and analysis variables are available. Returns: Dict: File variables special for this technique. """ file_vars = dict() # Add obs_version for ngs if config.tech.get("obs_format").str == "ngs": versions = files.glob_variable("vlbi_obs_ngs", "obs_version", r"\d{3}") if versions: file_vars["obs_version"] = max(versions) elif config.where.files.download_missing.bool: # Look online for a candidate log.info( "No NGS observation file found on disk: Looking for one online." ) obs_versions = [f"{v:03d}" for v in reversed(range(4, 10))] for obs_version in obs_versions: url = files.url("vlbi_obs_ngs", file_vars=dict(obs_version=obs_version), is_zipped=True, use_aliases=False) log.info(f"Looking for {url} ...") if url.exists(): file_vars["obs_version"] = obs_version break if not file_vars: log.fatal("No NGS observation file found") # Add obs_version for vgosdb if config.tech.get("obs_format").str == "vgosdb": versions = files.glob_variable("vlbi_obs_vgosdb", "obs_version", r"\d{3}") if versions: file_vars["obs_version"] = max(versions) elif config.where.files.download_missing.bool: # Look online for a candidate log.warn( "No VGOSDB wrapper file found. Not attempting to download. TODO" ) # log.info("No NGS wrapper file found on disk: Looking for one online.") # obs_versions = [f"{v:03d}" for v in reversed(range(4, 10))] # for obs_version in obs_versions: # url = files.url( # "vlbi_obs_ngs", file_vars=dict(obs_version=obs_version), is_zipped=True, use_aliases=False # ) # log.info(f"Looking for {url} ...") # if url.exists(): # file_vars["obs_version"] = obs_version # break if not file_vars: log.fatal("No VGOSDB observation file found") # Sinex file vars if "sinex" in config.tech.section_names: file_vars["solution"] = config.tech.sinex.solution.str file_vars["file_agency"] = config.tech.sinex.file_agency.str.lower() return file_vars
def _add_figures(dset: "Dataset", rpt: "Report", figure_dir: "pathlib.PosixPath") -> None: """Add figures to report Args: dset: A dataset containing the data. rpt: Report object. figure_dir: Figure directory. """ rpt.add_text("\n# GNSS signal-in-space (SIS) status\n\n") plt = GnssPlot(dset, figure_dir, figure_format=FIGURE_FORMAT) # Plot GNSS SIS status (except if only Galileo system is available) if not (len(dset.unique("system")) == 1 and dset.unique("system")[0] == "E"): caption = "GNSS signal-in-space (SIS) status for each navigation message." if "E" in dset.unique("system"): # Add extra comment for Galileo signal, nav_type = plt.get_first_galileo_signal() caption += ( f" Galileo SIS status is given for signal '{signal.upper()}' and " f"navigation message type '{nav_type}'.") rpt.add_figure( figure_path=plt.plot_gnss_signal_in_space_status_overview(), caption=caption, clearpage=True, ) # Plots are only generated for Galileo for figure_path in plt.plot_gnss_signal_in_space_status(): gnss = figure_path.stem.split("_")[5] if gnss == "galileo": caption = f"Galileo signal-in-space (SIS) status for signal {figure_path.stem.split('_')[-1].upper()}" else: caption = f"{gnss.upper()} signal-in-space (SIS) status" rpt.add_figure( figure_path=figure_path, caption=caption, ) rpt.add_text("\n\\clearpage\n\n") # Plot TGD/BGD comparison bias_comp_def = { "bgd_e1_e5a_diff", "bgd_e1_e5b_diff", "tgd_diff", "tgd_b1_b2_diff", "tgd_b1_b3_diff" } # Add DCB comparison results to dataset if not existing if not set(dset.fields).intersection(bias_comp_def): gnss_compare_tgd(dset) if set(dset.fields).intersection(bias_comp_def): for figure_path in plt.plot_tgd_comparison(): words = figure_path.stem.split("_") gnss = words[2] if words[1] == "field" else words[1] if "diff." in str(figure_path): caption = f"TGD/BGD comparison against DCBs for {enums.gnss_id_to_name[gnss].value}" elif "diff_mean." in str(figure_path): caption = f"TGD/BGD comparison against DCBs for {enums.gnss_id_to_name[gnss].value} (zero mean)" else: caption = f"TGD/BGD for {enums.gnss_id_to_name[gnss].value}" rpt.add_figure( figure_path=figure_path, caption=caption, ) else: log.warn(f"No TGD/BGD comparison plots are generated.") rpt.add_text("\n\\clearpage\n\n")
def run(rundate, pipeline, *args, **kwargs): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, *args, **kwargs): log.fatal( f"No configuration found for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) # Set up config config.init(rundate, pipeline, **kwargs) # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Validate input arguments try: prefix = plugins.call(package_name=__name__, plugin_name=pipeline, part="validate_args", rundate=rundate, **kwargs) except mg_exceptions.UnknownPluginError: log.warn( f"Pipeline {pipeline} has not defined function 'validate_args'") except exceptions.InvalidArgsError as err: from where.tools import delete # Clean up {placeholder} directories created by config delete.delete_analysis(rundate, pipeline, **kwargs) log.fatal(err) # Set up console logger and start file logger try: prefix = plugins.call(package_name=__name__, plugin_name=pipeline, part="log_prefix", rundate=rundate, **kwargs) except mg_exceptions.UnknownPluginError: log.warn(f"Pipeline {pipeline} has not defined function 'log_prefix'") prefix = "" log_cfg = config.where.log log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=config.files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Update analysis config and file variables config.set_analysis(rundate, pipeline=pipeline, **kwargs) config.set_file_vars(file_vars()) log.blank() # Empty line for visual clarity # Read which stages that should be executed once for each iterable skip_stages = config.tech.skip_stages.list stage_iterate = config.tech.stage_iterate.list dset_list = [] dset = None if stage_iterate: # Read which list should be iterated over and the placeholder name of each entry iterate_over, _, var_name = config.tech.stage_iterate_over.str.partition( ":") var_name = var_name.strip() # Iterate for item in config.tech[iterate_over].list: kwargs[var_name] = item log.blank() log.info(f"***** Running {item} *****") for prev_stage, stage in zip([None] + stage_iterate, stage_iterate): if stage not in skip_stages: dset = run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs) if dset is not None: dset_list.append(dset) dset = None kwargs[var_name] = "combined" if dset_list: dset_list[0].merge_with(*dset_list[1:], sort_by="time") dset = dset_list[0] if len(dset_list) > 1: log.info(f"Combining dataset for {len(dset_list)} {iterate_over}") dset.write_as(stage=stage_iterate[-1], label=2, **kwargs) # Read which stages that should be executed once stage_once = config.tech.stage_once.list # Find which stages we will run analysis for if not stage_once and not stage_iterate: stage_list = [s for s in stages(pipeline)] prev_stage_start = None else: stage_list = [s for s in stage_once] prev_stage_start = stage_iterate[-1] if stage_iterate else None for prev_stage, stage in zip([prev_stage_start] + stage_list, stage_list): if stage not in skip_stages: dset = run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs) log.blank() if dset is not None and dset.num_obs == 0: log.warn(f"No observations in dataset after {stage} stage.") break # Store configuration to library setup.store_config_to_library(rundate, pipeline, **kwargs) # Write requirements to file for reproducibility util.write_requirements()
def gnss_select_obs(dset: "Dataset") -> np.ndarray: """Select GNSS observations used in Where processing Args: dset (where.data.dataset.Dataset): A Dataset containing model data. Returns: Array containing False for observations to throw away """ remove_obstypes = set() keep_idx = np.full(dset.num_obs, True, dtype=bool) reject_nan = np.full(dset.num_obs, False, dtype=bool) reject_nan_all_sys = None obstypes_all = dset.obs.fields cfg_obs_code = config.tech[_SECTION].obs_code.list cfg_obstypes = config.tech[_SECTION].obs_types.list cfg_systems = config.tech.systems.list # Remove GNSS, which are not defined in configuration file for sys in list(dset.meta["obstypes"]): if sys not in cfg_systems: del dset.meta["obstypes"][sys] for obs, sys in enumerate(dset.system): if sys not in cfg_systems: keep_idx[obs] = False if not np.any(keep_idx): log.fatal( f"No observations available for selected system(s): {' '.join(cfg_systems)}." ) # Remove observation types, which are not given in configuration file. If no observation types are defined in # configuration file keep all observation types. if cfg_obstypes: for type_ in cfg_obstypes: if type_ not in dset.obs.fields: log.warn( f"Selected observation type {type_} is not included in GNSS observation data." ) log.debug( f"Remove undefined observation types in configuration file: {' '.join(set(obstypes_all) - set(cfg_obstypes))}." ) remove_obstypes = set(obstypes_all) - set(cfg_obstypes) # Remove undefined observation codes related to given configuration keep_obs_code = list() for obs_code in sorted(cfg_obs_code): if obs_code not in OBS_CODE_DEF: log.fatal( f"Observation code '{obs_code}' is not valid in option 'obs_code='." ) keep_obs_code.append(OBS_CODE_DEF[obs_code]) log.debug( f"Remove undefined observation codes: {' '.join(set(OBS_CODE_DEF.values()) - set(keep_obs_code))}." ) remove_obs_code = set(OBS_CODE_DEF.values()) - set(keep_obs_code) remove_obs_pattern = f"^{'|^'.join(remove_obs_code)}" for type_ in obstypes_all: search_obj = re.search(remove_obs_pattern, type_) if search_obj is not None: remove_obstypes.add(search_obj.string) # Select observations based on priority list # -> 1st step remove already unused observation types from Dataset to determine the basis for the priority list # selection # Note: The order of the selected observations is important for selection of GNSS code observation type to # determine satellite transmission time. _remove_obstype_from_dset(dset, remove_obstypes) selected_obstypes, add_remove_obstypes = _select_observations( obstypes_all, dset.meta["obstypes"]) remove_obstypes.update(add_remove_obstypes) log.debug( f"Remove observation types after selection: {' '.join(add_remove_obstypes)}." ) _remove_obstype_from_dset(dset, remove_obstypes) dset.meta["obstypes"] = selected_obstypes.copy() # Remove NaN values of selected observation types if config.tech[_SECTION].remove_nan.bool: # Note: An array 'reject_nan_all_sys' is created for all GNSS observation types. This array # shows, if some elements are set to NaN for a GNSS observation type. At the end only # NaN observations are removed, if these observations are NaN for all GNSS observation # types (see np.bitwise_and.reduce(reject_nan_all_sys, 1)). for sys in dset.meta["obstypes"]: # Loop over selected observation types for type_ in dset.meta["obstypes"][sys]: reject_nan[keep_idx] = np.isnan(dset.obs[type_][keep_idx]) if reject_nan_all_sys is None: reject_nan_all_sys = reject_nan continue if reject_nan_all_sys.ndim == 1: reject_nan_all_sys = np.hstack( (reject_nan_all_sys[:, None], reject_nan[:, None])) else: reject_nan_all_sys = np.hstack( (reject_nan_all_sys, reject_nan[:, None])) if reject_nan_all_sys.ndim > 1: reject_nan_all_sys = np.bitwise_and.reduce(reject_nan_all_sys, 1) if np.any(reject_nan_all_sys): keep_idx[keep_idx] = np.logical_not(reject_nan_all_sys)[keep_idx] log.debug(f"Remove {np.sum(reject_nan_all_sys)} NaN values.") return keep_idx
def _calculate(self, dset_out: "Dataset", dset_in: "Dataset", time: str = 'time') -> None: """Calculate broadcast ephemeris and satellite clock correction for given observation epochs As a first step observations are removed from unavailable satellites, unhealthy satellites and for exceeding the validity length of navigation records. The input Dataset contains observation epochs for which the broadcast ephemeris and satellite clock correction should be determined. Args: dset_out: Output Dataset representing calculated broadcast ephemeris with following fields: ======================== =============== ======= ======================================================== Field Type Unit Description ======================== =============== ======= ======================================================== gnss_satellite_clock numpy.ndarray m Satellite clock correction gnss_relativistic_clock numpy.ndarray m Relativistic clock correction due to orbit eccentricity sat_posvel PosVelTable m Satellite position and velocity satellite numpy.ndarray Satellite numbers system numpy.ndarray GNSS identifiers time TimeTable Observation epochs used_iode numpy.ndarray IODE of selected broadcast ephemeris block used_transmission_time TimeTable Transmission time of selected broadcast ephemeris block used_toe TimeTable Time of ephemeris (TOE) of selected broadcast ephemeris block ======================= =============== ======= ======================================================== dset_in: Input Dataset containing model data for which broadcast ephemeris should be determined. time: Define time fields to be used. It can be for example 'time' or 'sat_time'. 'time' is related to observation time and 'sat_time' to satellite transmission time. """ # Clean orbits by removing unavailable satellites, unhealthy satellites and checking validity length of # navigation records cleaners.apply_remover("gnss_clean_orbit", dset_in) not_implemented_sys = set(dset_in.system) - set("EG") if not_implemented_sys: log.warn( f"At the moment Where can provide broadcast ephemeris for GNSS 'E' and 'G', " f"but not for {', '.join(not_implemented_sys)}.") cleaners.apply_remover("gnss_ignore_system", dset_in, systems=not_implemented_sys) log.info( f"Calculating satellite position/velocity (broadcast) based on RINEX navigation file " f"{', '.join(self.dset_edit.meta['parser']['file_path'])}") # Get correct navigation block for given observations times by determining the indices to broadcast ephemeris # Dataset dset_brdc_idx = self._get_brdc_block_idx(dset_in, time=time) # Loop over all observations # TODO: Generation of vectorized solution, if possible? # BUG: Use of GPSSEC does not work for GPS WEEK crossovers. MJD * Unit.day2second() would a better solution. # The problem is that use of GPSSEC compared to MJD * Unit.day2second() is not consistent!!!! sat_pos = np.zeros((dset_in.num_obs, 3)) sat_vel = np.zeros((dset_in.num_obs, 3)) for obs_idx, (time_gpsweek, time_gpssec, brdc_idx, sys) in enumerate( zip(dset_in[time].gps.gpsweek, dset_in[time].gps.gpssec, dset_brdc_idx, dset_in.system)): # TODO: get_row() function needed for brdc -> brdc.get_row(kk) sat_pos[obs_idx], sat_vel[ obs_idx] = self._get_satellite_position_velocity( time_gpsweek, time_gpssec, brdc_idx, sys) # +DEBUG # print("DEBUG: {} obs_idx: {:>5d} brdc_idx: {:>5d} toc: {:>5.0f} {:>6.0f} toe: {:>6.0f} trans_time: {:>6.0f}" # " tk: {:>16.10f} iode: {:>3d} sqrt_a: {:>17.10f} sat_pos: {:>21.10f} {:>21.10f} {:>21.10f} " # "sat_vel: {:>17.10f} {:>17.10f} {:>17.10f} sat_clk_bias: {:>17.10f}, sat_clk_drft: {:>17.10f} " # ''.format(self.dset_edit.satellite[brdc_idx], obs_idx, brdc_idx, # dset_in[time].gps.jd_frac[obs_idx] * 86400, # dset_in[time].gps.gpssec[obs_idx], # self.dset_edit.toe.gps.gpssec[brdc_idx], # self.dset_edit.transmission_time.gps.gpssec[brdc_idx], # dset_in[time].gps.jd_frac[obs_idx]-self.dset_edit.toe.gps.gpssec[brdc_idx], # int(self.dset_edit.iode[brdc_idx]), # self.dset_edit.sqrt_a[brdc_idx], # sat_pos[obs_idx][0], sat_pos[obs_idx][1], sat_pos[obs_idx][2], # sat_vel[obs_idx][0], sat_vel[obs_idx][1], sat_vel[obs_idx][2], # self.dset_edit.sat_clock_bias[brdc_idx], # self.dset_edit.sat_clock_drift[brdc_idx],)) # -DEBUG # Copy fields from model data Dataset dset_out.num_obs = dset_in.num_obs dset_out.add_text("satellite", val=dset_in.satellite) dset_out.add_text("system", val=dset_in.system) dset_out.add_time("time", val=dset_in.time, scale=dset_in.time.scale) dset_out.vars["orbit"] = self.name # Add time field dset_out.add_time( "used_transmission_time", val=self.dset_edit.transmission_time[dset_brdc_idx], scale=self.dset_edit.transmission_time.scale, ) dset_out.add_time("used_toe", val=self.dset_edit.toe[dset_brdc_idx], scale=self.dset_edit.toe.scale) # Add float fields for field in [ "bgd_e1_e5a", "bgd_e1_e5b", "tgd", "tgd_b1_b3", "tgd_b2_b3" ]: if field in self.dset_edit.fields: dset_out.add_float(field, val=self.dset_edit[field][dset_brdc_idx]) dset_out.add_float("gnss_relativistic_clock", val=self.relativistic_clock_correction( sat_pos, sat_vel), unit="meter") dset_out.add_float("gnss_satellite_clock", val=self.satellite_clock_correction(dset_in, time=time), unit="meter") dset_out.add_float("used_iode", val=self.dset_edit.iode[dset_brdc_idx]) # Add satellite position and velocity to Dataset dset_out.add_posvel("sat_posvel", time="time", itrs=np.hstack((sat_pos, sat_vel)))
def _get_brdc_block_idx(self, dset: "Dataset", time: str = "time") -> List[int]: """Get GNSS broadcast ephemeris block indices for given observation epochs The indices relate the observation epoch to the correct set of broadcast ephemeris. First the time difference between the observation epoch and a selected time is calculated to determine the correct broadcast ephemeris block. The seleted time can be either the navigation epoch (time of clock (TOC)), the time of ephemeris (TOE) or the transmission time. Afterwards the broastcast block is selected with the smallest time difference. Following option can be choosen for configuration file option 'brdc_block_nearest_to': ============================== ================================================================================ Option Description ============================== ================================================================================ toc Broadcast block for given observation epoch is selected nearest to navigation epoch (time of clock (TOC)). toc:positive Same as 'toc' option, but the difference between observation epoch and TOC has to be positive. toe Broadcast block for given observation epoch is selected nearest to time of ephemeris (TOE). toe:positive Same as 'toe' option, but the difference between observation epoch and TOE has to be positive. transmission_time Broadcast block for given observation epoch is selected nearest to transmission time. transmission_time:positive Same as 'transmission_time' option, but the difference between observation epoch and transmission time has to be positive. ============================= ================================================================================= Args: dset: A Dataset containing model data. time: Define time fields to be used. It can be for example 'time' or 'sat_time'. 'time' is related to observation time and 'sat_time' to satellite transmission time. Returns: Broadcast ephemeris block indices for given observation epochs. """ brdc_block_nearest_to_options = [ "toc", "toc:positive", "toe", "toe:positive", "transmission_time", "transmission_time:positive", ] brdc_idx = list() # Get configuration option brdc_block_nearest_to = config.tech.get( "brdc_block_nearest_to", default="toe:positive").str.rsplit(":", 1) if ":".join( brdc_block_nearest_to) not in brdc_block_nearest_to_options: log.fatal( f"Unknown value {':'.join(brdc_block_nearest_to)!r} for configuration option 'brdc_block_nearest_to'. " f"The following values can be selected: {', '.join(brdc_block_nearest_to_options)}" ) time_key = brdc_block_nearest_to[0] positive = True if "positive" in brdc_block_nearest_to else False log.debug( f"Broadcast block is selected nearest to '{'+' if positive else '+/-'}{time_key}' time." ) # Check if broadcast orbits are available not_available_sat = sorted( set(dset.satellite) - set(self.dset_edit.satellite)) if not_available_sat: log.warn( f"The following satellites are not given in apriori broadcast orbit file " f"{', '.join(self.dset_edit.meta['parser']['file_path'])}: {', '.join(not_available_sat)}" ) cleaners.apply_remover("ignore_satellite", dset, satellites=not_available_sat) # Determine broadcast ephemeris block index for a given satellite and observation epoch for sat, time in zip(dset.satellite, dset[time]): idx = self.dset_edit.filter(satellite=sat) diff = time.gps.mjd - self.dset_edit[time_key].gps.mjd[idx] if positive: nearest_idx = np.array([99999 if v < 0 else v for v in diff]).argmin() else: nearest_idx = np.array([abs(diff)]).argmin() brdc_idx.append(idx.nonzero()[0][nearest_idx]) return brdc_idx
def estimate_cpwl(dset, partial_vectors, obs_noise): """Estimate with continuous piecewise linear functions TODO: Describe phi and Q Args: dset (Dataset): Model run data. partial_vectors (Dict): Names and values of the partial derivatives for each partial config key. obs_noise (Array): Observation noise, numpy array with one float value for each observation. """ # Organize partial derivatives (state vectors) into a matrix n_constant = len(partial_vectors["estimate_constant"]) n_stochastic = len(partial_vectors["estimate_stochastic"]) n = n_constant + 2 * n_stochastic num_unknowns = n num_obs = dset.num_obs h = np.zeros((num_obs, n, 1)) param_names = list() # Constant parameters are simply copied from the partial fields for idx, name in enumerate(partial_vectors["estimate_constant"]): h[:, idx, 0] = dset["partial_" + name][:] param_names.append(name) # Stochastic parameters are estimated as CPWL functions by adding a rate parameter for idx, name in enumerate(partial_vectors["estimate_stochastic"]): h[:, n_constant + idx * 2, 0] = dset["partial_" + name][:] param_names.extend([ name, name + "_rate_" ]) # Trailing underscore in rate_ means field is not added to dset # Read information about parameters from config files ref_time = np.ones(n) * dset.time.utc[0].mjd knot_interval = np.ones(n) * np.inf process_noise = np.zeros(n) apriori_stdev = np.empty(n) constant_params = { c.split("-")[0] for c in partial_vectors["estimate_constant"] } for param in constant_params: idx = np.array([c.startswith(param + "-") for c in param_names]) apriori_stdev[idx] = config.tech[param].apriori_stdev.float stochastic_params = { c.split("-")[0] for c in partial_vectors["estimate_stochastic"] } for param in stochastic_params: # Set default knot_interval intervals = config.tech[param].knot_interval.list const_idx = np.array([c.startswith(param + "-") for c in param_names]) rate_idx = np.array([ c.startswith(param + "-") and c.endswith("rate_") for c in param_names ]) knot_interval[rate_idx] = float(intervals.pop(0)) * unit.seconds2day for interval in intervals: # (Potentially) overwrite with station specific knot_interval sta, _, seconds = interval.partition(":") rate_idx_sta = np.array([ c.startswith(param + "-") and c.endswith("rate_") and sta in c for c in param_names ]) knot_interval[rate_idx_sta] = float(seconds) * unit.seconds2day process_noise[rate_idx] = config.tech[param].process_noise.float apriori_stdev[const_idx] = config.tech[param].apriori_stdev.float apriori_stdev[rate_idx] = config.tech[ param].apriori_rate_stdev.float # Rate parameters # Initialize variables z = dset.obs - dset.calc # phi = np.repeat(np.eye(n)[None, :, :], num_obs, axis=0) phi = list() delta_phi = np.eye(n, k=1) delta_phi[:, :n_constant] = 0 delta_phi[:, n_constant::2] = 0 Q = dict() for epoch in range(num_obs - 1): # TODO: Check that 24 is correct here (and use unit instead) delta_t = (dset.time.utc[epoch + 1].mjd - dset.time.utc[epoch].mjd) * 24 # phi[epoch] += delta_phi * delta_t phi.append(scipy.sparse.csr_matrix(np.eye(n) + delta_phi * delta_t)) idx = np.logical_and( process_noise, dset.time.utc[epoch + 1].mjd > ref_time + knot_interval) indicies = np.where(idx)[0] Q[epoch] = {(i, i): process_noise[i]**2 for i in indicies} ref_time[idx] += knot_interval[idx] * ( (dset.time.utc[epoch + 1].mjd - ref_time[idx]) // knot_interval[idx]) num_unknowns += int(sum(idx)) phi.append(scipy.sparse.csr_matrix(np.eye(n))) # Add pseudo-observations constraints = config.tech.get(key="estimate_constraint", default="").as_list(split_re=", *") if constraints: trf_constraints = [c for c in constraints if "crf" not in c] reference_frame = config.tech.reference_frames.list[0] trf = apriori.get("trf", time=dset.time.utc.mean, reference_frames=reference_frame) d = np.zeros((n, 6)) stations = set() for idx, column in enumerate(param_names): if "_site_pos-" not in column: continue station = column.split("-", maxsplit=1)[-1].rsplit("_", maxsplit=1)[0] key = dset.meta[station]["site_id"] if key in trf: x0, y0, z0 = trf[key].pos.itrs # TODO: Take units into account if column.endswith("_x"): d[idx, :] = np.array([1, 0, 0, 0, z0, -y0]) if column.endswith("_y"): d[idx, :] = np.array([0, 1, 0, -z0, 0, x0]) if column.endswith("_z"): d[idx, :] = np.array([0, 0, 1, y0, -x0, 0]) stations.add(station) # TODO deal with slr_site_pos etc log.info( "Applying {} with {} from {}", "/".join(trf_constraints).upper(), ", ".join(stations), reference_frame.upper(), ) if "nnt" in constraints and "nnr" in constraints and "vlbi_site_pos" in constant_params: obs_noise = np.hstack( (obs_noise, np.array([.0001**2] * 3 + [(1.5e-11)**2] * 3))).T elif "nnt" in constraints and "nnr" not in constraints and "vlbi_site_pos" in constant_params: d = d[:, 0:3] obs_noise = np.hstack((obs_noise, np.array([.0001**2] * 3))).T elif "nnt" not in constraints and "nnr" in constraints and "vlbi_site_pos" in constant_params: d = d[:, 3:6] obs_noise = np.hstack((obs_noise, np.array([(1.5e-11)**2] * 3))).T elif "nnt" not in constraints and "nnr" not in constraints and "vlbi_site_pos" in constant_params: d = np.zeros((n, 0)) log.warn("Unknown constraints {}. Not applying.", "/".join(constraints).upper()) num_constraints = d.shape[1] try: h = np.vstack((h, (np.linalg.inv(d.T @ d) @ d.T)[:, :, None])) except np.linalg.linalg.LinAlgError: pass if "nnr_crf" in constraints and "vlbi_src_dir" in constant_params: celestial_reference_frame = config.tech.celestial_reference_frames.list[ 0] crf = apriori.get( "crf", celestial_reference_frames=celestial_reference_frame, session=dset.dataset_name) # NNR to CRF log.info("Applying NNR constraint to {}", celestial_reference_frame.upper()) H2 = np.zeros((3, n)) for idx, column in enumerate(param_names): if "_src_dir-" not in column: continue source = column.split("-", maxsplit=1)[-1].split("_")[0] if source in crf: ra = crf[source].pos.crs[0] dec = crf[source].pos.crs[1] if column.endswith("_ra"): H2[0, idx] = -np.cos(ra) * np.sin(dec) * np.cos(dec) H2[1, idx] = -np.sin(ra) * np.sin(dec) * np.cos(dec) H2[2, idx] = np.cos(dec)**2 if column.endswith("_dec"): H2[0, idx] = np.sin(ra) H2[1, idx] = -np.cos(ra) obs_noise = np.hstack((obs_noise, np.array([(1e-6)**2] * 3))) num_constraints += 3 h = np.vstack((h, H2[:, :, None])) z = np.hstack((z, np.zeros(num_constraints))).T # phi = np.vstack((phi, np.repeat(np.eye(n)[None, :, :], num_constraints, axis=0))) phi = phi + [scipy.sparse.csr_matrix(np.eye(n))] * num_constraints # Initialize and run the Kalman filter kalman = KalmanFilter(h, z=z, apriori_stdev=apriori_stdev, phi=phi, r=obs_noise, Q=Q, param_names=param_names) kalman.filter() # Update the dataset with results from the filter kalman.update_dataset(dset, param_names=param_names, normal_idx=slice(0, n_constant), num_unknowns=num_unknowns) kalman.cleanup()
def compare(date: "datedoy", pipeline: "pipeline", items: "option", specifier: "option"): log.init(log_level="info") dsets = dict() # Additional options stage = util.read_option_value("--stage") writer_names = util.read_option_value("--writers").replace(",", " ").split() items_ = [s.strip() for s in items.split(",")] # Get optional options label = util.read_option_value("--label", default="None") # TODO label = "last" if label == "last" else label station = util.read_option_value("--station", default="") id_ = util.read_option_value("--id", default="") # Update configuration of Where analysis config.where.update_from_options(_clean_sys_argv(pipeline)) # Get dataset variables dset_vars = config.create_file_vars(rundate=date, pipeline=pipeline) # Read datasets for given specifier if specifier == "id": for id_ in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset id '{id_}'.") continue dset.vars.update(dset_vars) dset.vars["id"] = id_ dsets.update({id_: dset}) elif specifier == "station": for station in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset station '{station}'.") continue dset.vars.update(dset_vars) dset.vars["station"] = station dsets.update({station: dset}) elif specifier == "stage": for stage in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset stage '{stage}'.") continue dset.vars.update(dset_vars) dset.vars["stage"] = stage dsets.update({stage: dset}) else: log.fatal( f"Specifier {specifier} is not defined. It should be either 'id', 'station' or 'stage'." ) if len(dsets) == 0: log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].") elif len(dsets) == 1: log.warn( f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available." ) # Loop over writers for writer in writer_names: write(writer, dset=dsets)
def thermal_deformation_station(dset, temp_funcs): """Calculate thermal deformation at one station The foundation is assumed to be made of concrete and the antenna is assumed to be made of steel. Args: dset: A Dataset containing model data. antenna_info: Antenna info from Nothnagel's antenna info file. temp: Array of temperature sine functions Returns: Numpy array with delay caused by thermal deformation in meters. """ antenna_info = apriori.get("vlbi_antenna_info") # time delay for antenna and foundation dt_a = 2 / 24 # unit.hours2days dt_f = 6 / 24 # unit.hours2days delays = np.zeros(dset.num_obs) sin_a = np.sin(dset.site_pos.azimuth) cos_a = np.cos(dset.site_pos.azimuth) sin_e = np.sin(dset.site_pos.elevation) cos_e = np.cos(dset.site_pos.elevation) cos_d = np.cos(dset.src_dir.declination) for ivsname in dset.unique("ivsname"): if ivsname not in antenna_info: log.warn( "Missing thermal deformation for ivsname '{}'. Correction set to zero.", ivsname) continue idx = dset.filter(ivsname=ivsname) AO = antenna_info[ivsname]["axis_offset"] axis_type = antenna_info[ivsname]["mount"] focus_type = antenna_info[ivsname]["focus"] gamma_f = antenna_info[ivsname]["coefficient_foundation"] gamma_a = antenna_info[ivsname]["coefficient_fixed_axis"] T_0 = antenna_info[ivsname]["reference_temperature"] h_f = antenna_info[ivsname]["height_foundation"] h_p = antenna_info[ivsname]["fixed_axis"] h_v = antenna_info[ivsname]["distance_antenna_vertex"] h_s = antenna_info[ivsname]["height_focus"] T = temp_funcs[ivsname] t = dset.time.utc.mjd if focus_type == "FO_PRIM": F_a = 0.9 elif focus_type == "FO_SECN": F_a = 1.8 else: log.warn( "Unknown antenna focus type '{}' for {}. Correction set to zero", focus_type, ivsname) continue if axis_type == "MO_AZEL": delays[idx] = gamma_f * (T(t - dt_f)[idx] - T_0) * ( h_f * sin_e[idx]) + gamma_a * (T(t - dt_a)[idx] - T_0) * ( h_p * sin_e[idx] + AO * cos_e[idx] + h_v - F_a * h_s) elif axis_type == "MO_EQUA": delays[idx] = gamma_f * (T(t - dt_f)[idx] - T_0) * ( h_f * sin_e[idx]) + gamma_a * (T(t - dt_a)[idx] - T_0) * ( h_p * sin_e[idx] + AO * cos_d[idx] + h_v - F_a * h_s) elif axis_type == "MO_XYNO": delays[idx] = gamma_f * (T(t - dt_f)[idx] - T_0) * ( h_f * sin_e[idx]) + gamma_a * (T(t - dt_a)[idx] - T_0) * ( h_p * sin_e[idx] + AO * np.sqrt(1 - (cos_e[idx] * cos_a[idx])**2) + h_v - F_a * h_s) elif axis_type == "MO_XYEA": delays[idx] = gamma_f * (T(t - dt_f)[idx] - T_0) * ( h_f * sin_e[idx]) + gamma_a * (T(t - dt_a)[idx] - T_0) * ( h_p * sin_e[idx] + AO * np.sqrt(1 - (cos_e[idx] * sin_a[idx])**2) + h_v - F_a * h_s) else: log.warn( "Unknown antenna axis type '{}' for {}. Correction set to zero", axis_type, ivsname) continue if any(np.isnan(delays)): # log.warn("Unable to interpolate temperatures for {}. Correction set to zero", ivsname) delays[idx] = 0 return delays
def _generate_dataframes( dset: Dict[str, "Dataset"]) -> Dict[str, pd.core.frame.DataFrame]: """Generate dataframe based on station datasets The dataframe for each station in dictionary "dfs" has following columns: east: East-coordinate in topocentric system north: North-coordinate in topocentric system up: Up-coordinate in topocentric system hpe: horizontal position error vpe: vertical position error pos_3d: 3D position error pdop: position dilution of precision hdop: horizontal dilution of precision vdop: vertical dilution of precision Example for "dfs" dictionary: 'hons': time_gps hpe vpe east north up 0 2019-03-01 00:00:00 0.301738 0.057244 0.113758 0.279472 0.057244 1 2019-03-01 00:00:00 0.301738 0.057244 0.113758 0.279472 0.057244 'krss': time_gps hpe vpe east north up 0 2019-03-01 00:00:00 0.710014 0.186791 -0.235267 0.669903 0.186791 1 2019-03-01 00:00:00 0.710014 0.186791 -0.235267 0.669903 0.186791 Example for "dfs_day" dictionary for "mean" key: 'mean':{ 'hpe': nabf vegs hons krss time_gps 2019-03-01 1.368875 0.935687 1.136763 0.828754 2019-03-02 0.924839 0.728280 0.911677 0.854832 'vpe': nabf vegs hons krss time_gps 2019-03-01 1.715893 1.147265 1.600330 0.976541 2019-03-02 1.533437 1.307373 1.476295 1.136991 } Example for "dfs_month" dictionary for "mean" key: 'mean':{ 'hpe': nabf vegs hons krss Mar-2019 1.186240 0.861718 1.095827 1.021354 Apr-2019 0.891947 0.850343 0.977908 0.971099 'vpe': nabf vegs hons krss Mar-2019 1.854684 1.291406 1.450466 1.225467 Apr-2019 1.964404 1.706507 1.687994 1.500742 } Args: dset: Dictionary with station name as keys and the belonging Dataset as value Returns: Tuple with following entries: | Element | Description | |----------------------|--------------------------------------------------------------------------------------| | dfs | Dictionary with station name as keys and the belonging dataframe as value with | | | following dataframe columns: east, north, up, hpe, vpe, pos_3d | | dfs_day | Dictionary with function type as keys ('mean', 'percentile', 'rms', 'std') and a | | | dictionary as values. The dictionary has fields as keys (e.g. hpe, vpe) and the | | | belonging dataframe as value with DAILY samples of 95th percentile and stations as | | | columns. | | dfs_month | Dictionary with function type as keys ('mean', 'percentile', 'rms', 'std') and a | | | dictionary as values. The dictionary has fields as keys (e.g. hpe, vpe) and the | | | belonging dataframe as value with MONTHLY samples of 95th percentile and stations as | | | columns. | """ dsets = dset dfs = {} fields = { "east": pd.DataFrame(), "north": pd.DataFrame(), "up": pd.DataFrame(), "hpe": pd.DataFrame(), "vpe": pd.DataFrame(), "pos_3d": pd.DataFrame(), "pdop": pd.DataFrame(), "hdop": pd.DataFrame(), "vdop": pd.DataFrame(), } dfs_day = { "mean": copy.deepcopy(fields), "percentile": copy.deepcopy(fields), "std": copy.deepcopy(fields), "rms": copy.deepcopy(fields), } dfs_month = { "mean": copy.deepcopy(fields), "percentile": copy.deepcopy(fields), "std": copy.deepcopy(fields), "rms": copy.deepcopy(fields), } for station, dset in dsets.items(): if dset.num_obs == 0: log.warn(f"Dataset '{station}' is empty.") continue # Determine topocentric coordinates (east, north, up) ref_pos = position.Position( np.repeat( np.array([ dset.meta["pos_x"], dset.meta["pos_y"], dset.meta["pos_z"] ])[None, :], dset.num_obs, axis=0, ), system="trs", ) if not "enu" in dset.fields: dset.add_position_delta( name="enu", val=(dset.site_pos.trs - ref_pos).val, system="trs", ref_pos=ref_pos, ) # TODO: Maybe it is not necessary to introduce enu, hpe and vpe to dataset # Maybe better to introduce fields in estimate stage already. if not "hpe" in dset.fields: hpe = np.sqrt(dset.enu.enu.east**2 + dset.enu.enu.north**2) dset.add_float("hpe", val=hpe) if not "vpe" in dset.fields: vpe = np.absolute(dset.enu.enu.up) dset.add_float("vpe", val=vpe) if not "pos_3d" in dset.fields: pos_3d = np.sqrt(dset.enu.enu.east**2 + dset.enu.enu.north**2 + dset.enu.enu.up**2) dset.add_float("pos_3d", val=pos_3d) # Determine dataframe df = dset.as_dataframe(fields=[ "enu.enu", "time.gps", "hpe", "vpe", "pos_3d", "pdop", "vdop", "hdop" ]) df = df.rename(columns={ "enu_enu_0": "east", "enu_enu_1": "north", "enu_enu_2": "up" }) if df.empty: continue else: # Save data in dictionaries dfs.update({station: df}) for type_ in dfs_day.keys(): df_day = _apply(df, "D", type_) for field in fields.keys(): if dfs_day[type_][field].empty: dfs_day[type_][field][station] = df_day[field] else: dfs_day[type_][field] = pd.concat( [dfs_day[type_][field], df_day[field]], axis=1) dfs_day[type_][field] = dfs_day[type_][field].rename( columns={field: station}) df_month = _apply(df, "M", type_) df_month.index = df_month.index.strftime("%b-%Y") for field in fields.keys(): dfs_month[type_][field][station] = df_month[field] return dfs, dfs_day, dfs_month
def _generate_dataframe( dsets: Dict[str, "Dataset"]) -> Tuple[pd.core.frame.DataFrame]: """Generate dataframes based on SISRE datasets The dataframe "df" has following columns: time_gps: Time in GPS time scale given as datetime objects satellite: Satellite identifiers system: GNSS identifier <solution_1>: First SISRE solution (e.g. E1) <solution_2>: Second SISRE solution (e.g. E1/E5b) <solution_3>: Second SISRE solution (e.g. E1/E5a) Example for "df" dictionary: time_gps satellite system E1 E1/E5b E1/E5a 0 2019-01-01 00:00:00 E01 E 0.173793 0.123220 0.171849 1 2019-01-01 00:00:00 E02 E 0.048395 0.127028 0.108108 2 2019-01-01 00:00:00 E03 E 0.089328 0.121884 0.079576 3 2019-01-01 00:00:00 E04 E 0.110866 0.088446 0.092292 4 2019-01-01 00:00:00 E05 E 0.348935 0.305333 0.258733 "df_month_perc" is a dataframe with month as indices and SISRE 95% percentile values for each signal combination as columns. Example for "df_month_perc" dictionary: E1 E1/E5b E1/E5a Jan-2019 0.335688 0.297593 0.326859 Feb-2019 0.380575 0.330701 0.352535 Mar-2019 0.353586 0.314817 0.344597 Example for "df_month_rms" dictionary: TODO Args: dsets: Dictionary with SISRE solution name as keys (e.g. cnes_inav_e1, cnes_inav_e1e5b, cnes_fnav_e1e5a) and the belonging Dataset as value Returns: Tuple with following entries: | Element | Description | |----------------------|--------------------------------------------------------------------------------------| | df | Given DAILY SISRE solutions are merged into one dataframe | | df_month_perc | Dataframe with MONTHLY samples of 95th percentile SISRE (based on Galileo SDD v1.0 | | | version) | | df_month_perc_rms | Dataframe with MONTHLY samples of 95th percentile SISRE, which are based on epochwise| | | RMS SISRE solutions (based on Galileo SDD v1.1 version) | | df_month_rms | Dataframe with MONTHLY samples of RMS SISRE | """ df = pd.DataFrame() signal_types = list() for name, dset in dsets.items(): if dset.num_obs == 0: log.warn(f"Dataset '{name}' is empty.") continue signal_type = _get_signal_type(dset.meta) signal_types.append(signal_type) df_tmp = dset.as_dataframe( fields=["satellite", "system", "sisre", "time.gps"]) # , index="time.gps") df_tmp = df_tmp.rename(columns={"sisre": signal_type}) if df.empty: df = df_tmp continue df = df.merge(df_tmp, on=["satellite", "system", "time_gps"], how="outer") if df.empty: log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].") # Generate monthly samples of 95th percentile SISRE (after SDD v1.0 version) df_month_perc = df.set_index("time_gps").resample( "M", how=lambda x: np.nanpercentile(x, q=95)) df_month_perc.index = df_month_perc.index.strftime("%b-%Y") # Generate monthly samples of 95th percentile SISRE based on epochwise SISRE RMS solutions(after SDD v1.1 version) # # NOTE: Following solutions assumes that SISRE solution in dataframe 'df' is only given for one GNSS if len(set(df["system"])) > 1: log.fatal( f"Determination of 95th percentile SISRE based on epochwise SISRE RMS solutions can only be applied" f"for one given GNSS and not for {set(df['system'])} together.") epochs = sorted(set(df["time_gps"])) df_tmp = pd.DataFrame(index=epochs, columns=signal_types) ## Loop over observation epochs for epoch in epochs: idx = df["time_gps"] == epoch row = dict() # Determine RMS for each signal type over all given SISRE satellite solutions in each epoch for signal_type in signal_types: row[signal_type] = np.sqrt( np.nanmean(np.square(df[signal_type][idx]))) df_tmp.loc[epoch] = pd.Series(row) df_month_perc_rms = df_tmp.resample( "M", how=lambda x: np.nanpercentile(list(x), q=95)) df_month_perc_rms.index = df_month_perc_rms.index.strftime("%b-%Y") # Generate monthly samples of RMS SISRE df_month_rms = df.set_index("time_gps").resample( "M", how=lambda x: np.sqrt(np.nanmean(np.square(x)))) df_month_rms.index = df_month_rms.index.strftime("%b-%Y") return df, df_month_perc.transpose(), df_month_perc_rms.transpose( ), df_month_rms.transpose()
def _organize_data(self): """ Copy content from self.raw to self.data and convert all data to arrays with num_obs length """ meta = self.data.setdefault("meta", {}) meta["session_code"] = self.raw["Session"].get("Session") # Epoch info self.data["time"] = self.raw["Observables"]["TimeUTC"]["time"] num_obs = len(self.data["time"]) self.data["station_1"] = self.raw["Observables"]["Baseline"][ "Baseline"].reshape(num_obs, -1)[:, 0] self.data["station_2"] = self.raw["Observables"]["Baseline"][ "Baseline"].reshape(num_obs, -1)[:, 1] self.data["source"] = self.raw["Observables"]["Source"]["Source"] # Obs info try: self.data["observed_delay_ferr"] = self.raw["Observables"][ "GroupDelay"]["X"]["GroupDelaySig"] * constant.c except KeyError: self.data["observed_delay_ferr"] = np.zeros(num_obs) log.error("Missing group delay formal error information") try: self.data["data_quality"] = self.raw["ObsEdit"]["Edit"][ "DelayFlag"] except KeyError: self.data["data_quality"] = np.full(num_obs, np.nan) log.warn("Missing data quality information") try: self.data["observed_delay"] = self.raw["ObsEdit"][ "GroupDelayFull"]["X"]["GroupDelayFull"] * constant.c except KeyError: self.data["observed_delay"] = np.full(num_obs, np.nan) log.error("Missing full group delay information") try: self.data["iono_delay"] = ( self.raw["ObsDerived"]["Cal-SlantPathIonoGroup"]["X"] ["Cal-SlantPathIonoGroup"].reshape(num_obs, -1)[:, 0] * constant.c) except KeyError: try: self.data["dtec"] = self.raw["Observables"]["DiffTec"][ "diffTec"] # Unit: TECU self.data["ref_freq"] = self.raw["Observables"]["RefFreq"][ "X"]["RefFreq"] * Unit.MHz2Hz # Unit: except KeyError: self.data["iono_delay"] = np.full(num_obs, np.nan) log.warn("Missing ionosphere delay information") try: self.data["iono_delay_ferr"] = ( self.raw["ObsDerived"]["Cal-SlantPathIonoGroup"]["X"] ["Cal-SlantPathIonoGroupSigma"].reshape(num_obs, -1)[:, 0] * constant.c) except KeyError: try: self.data["dtec_ferr"] = self.raw["Observables"]["DiffTec"][ "diffTecStdDev"] # Unit: TECU except KeyError: self.data["iono_delay_ferr"] = np.full(num_obs, np.nan) if not np.isnan(self.data["iono_delay"]).all(): log.warn( "Missing ionosphere delay formal error information") try: self.data["iono_quality"] = self.raw["ObsDerived"][ "Cal-SlantPathIonoGroup"]["X"][ "Cal-SlantPathIonoGroupDataFlag"] except KeyError: self.data["iono_quality"] = np.full(num_obs, np.nan) log.warn("Missing ionosphere quality information") # Station dependent info for field, params in self._STATION_FIELDS.items(): self.data[field + "_1"] = np.zeros(len(self.data["time"])) self.data[field + "_2"] = np.zeros(len(self.data["time"])) for station in self.raw["Head"]["StationList"]: sta_idx_1 = self.data["station_1"] == station sta_idx_2 = self.data["station_2"] == station sta_key = station.replace(" ", "_") sta_time = self.raw[sta_key]["TimeUTC"]["sec_since_ref"] try: sta_data = self.raw[sta_key][params["filestub"]][ params["variable"]] missing_idx = np.isclose(sta_data, params["nan_value"]) sta_data[missing_idx] = np.nan if missing_idx.any(): log.warn(f"Missing {field} data for {station}") except KeyError: sta_data = np.full(len(sta_time), np.nan) log.warn(f"Missing all {field} data for {station}") if len(sta_data) == 1: # Use constant function if there is only one data point func = lambda _: sta_data[0] else: func = interpolate.interp1d( sta_time, sta_data, bounds_error=False, fill_value=(sta_data[0], sta_data[-1]), assume_sorted=True, ) epochs_1 = self.raw["Observables"]["TimeUTC"]["sec_since_ref"][ sta_idx_1] epochs_2 = self.raw["Observables"]["TimeUTC"]["sec_since_ref"][ sta_idx_2] self.data[field + "_1"][sta_idx_1] = func(epochs_1) * params["factor"] self.data[field + "_2"][sta_idx_2] = func(epochs_2) * params["factor"]
def _write_to_dataset(parser, dset, rundate, session): data = parser.as_dict() # TODO: units on fields # Convert source names to official IERS names source_names = apriori.get("vlbi_source_names") iers_source_names = [ source_names[src]["iers_name"] if src in source_names else src for src in data["source"] ] data["source"] = iers_source_names # Replace spaces in station names with underscores to match official IVS name data["station_1"] = np.char.replace(data["station_1"], " ", "_") data["station_2"] = np.char.replace(data["station_2"], " ", "_") dset.num_obs = len(data["time"]) dset.add_time("time", val=data.pop("time"), scale="utc", format="isot", write_level="operational") for field, values in data.items(): values = np.array(values) if values.dtype.kind in {"U", "S"}: dset.add_text(field, val=values, write_level="operational") elif values.dtype.kind in {"f", "i"}: dset.add_float(field, val=values, write_level="operational") elif values.dtype.kind in {"O"}: continue else: log.warn("Unknown datatype {} for field {}", values.dtype, field) # Source directions crf = apriori.get("crf", session=session) ra = np.array( [crf[s].pos.crs[0] if s in crf else 0 for s in data["source"]]) dec = np.array( [crf[s].pos.crs[1] if s in crf else 0 for s in data["source"]]) dset.add_direction("src_dir", ra=ra, dec=dec, write_level="operational") # Station information log.info("Found stations: {}", ", ".join(dset.unique("station"))) trf = apriori.get("trf", time=dset.time) station_codes = apriori.get("vlbi_station_codes") dset.add_text( "baseline", val=np.array([ f"{s1}/{s2}" for s1, s2 in zip(data["station_1"], data["station_2"]) ]), write_level="operational", ) for site in dset.unique("station"): if site in station_codes: cdp = station_codes[site]["cdp"] trf_site = trf[cdp] else: named_site = trf.named_site(site) trf_site = trf.closest(named_site.pos, max_distance=5) cdp = trf_site.key ignore_stations = config.tech.ignore_station.stations.list if site in ignore_stations: log.info("Undefined station name {}. Assuming station is {}.", site, trf_site.name) else: log.warn("Undefined station name {}. Assuming station is {}.", site, trf_site.name) data["pos_" + site] = trf_site.pos.itrs log.debug("Using position {} for {} from {}", np.mean(data["pos_" + site], axis=0), site, trf_site.source) ivsname = station_codes[cdp]["name"] data["sta_" + site] = dict(site_id=cdp, cdp=cdp, ivsname=ivsname) # Positions itrs_pos_1 = np.array( [data["pos_" + s][i, :] for i, s in enumerate(data["station_1"])]) itrs_vel_1 = np.zeros((dset.num_obs, 3)) dset.add_posvel( "site_pos_1", time="time", other="src_dir", itrs=np.concatenate((itrs_pos_1, itrs_vel_1), axis=1), write_level="operational", ) itrs_pos_2 = np.array( [data["pos_" + s][i, :] for i, s in enumerate(data["station_2"])]) itrs_vel_2 = np.zeros((dset.num_obs, 3)) dset.add_posvel( "site_pos_2", time="time", other="src_dir", itrs=np.concatenate((itrs_pos_2, itrs_vel_2), axis=1), write_level="operational", ) # Station data sta_fields = set().union( *[v.keys() for k, v in data.items() if k.startswith("sta_")]) for field in sta_fields: dset.add_text(field + "_1", val=[data["sta_" + s][field] for s in data["station_1"] ]) # write_level='analysis') dset.add_text(field + "_2", val=[data["sta_" + s][field] for s in data["station_2"] ]) # write_level='analysis') # Station meta station_keys = sorted([k for k, v in data.items() if k.startswith("sta_")]) pos_keys = sorted([k for k, v in data.items() if k.startswith("pos_")]) for sta_key, pos_key in zip(station_keys, pos_keys): sta_name = sta_key.replace("sta_", "") cdp = data[sta_key]["cdp"] ivsname = station_codes[cdp]["name"] longitude, latitude, height, _ = sofa.iau_gc2gd( 2, data[pos_key][0, :]) # TODO: Reference ellipsoid dset.add_to_meta(ivsname, "cdp", cdp) dset.add_to_meta(ivsname, "site_id", cdp) dset.add_to_meta(ivsname, "domes", station_codes[cdp]["domes"]) dset.add_to_meta(ivsname, "marker", station_codes[cdp]["marker"]) dset.add_to_meta(ivsname, "description", station_codes[cdp]["description"]) dset.add_to_meta(ivsname, "longitude", longitude) dset.add_to_meta(ivsname, "latitude", latitude) dset.add_to_meta(ivsname, "height", height) if sta_name != ivsname: dset.add_to_meta(sta_name, "cdp", cdp) dset.add_to_meta(sta_name, "site_id", cdp) dset.add_to_meta(sta_name, "domes", station_codes[cdp]["domes"]) dset.add_to_meta(sta_name, "marker", station_codes[cdp]["marker"]) dset.add_to_meta(sta_name, "description", station_codes[cdp]["description"]) dset.add_to_meta(sta_name, "longitude", longitude) dset.add_to_meta(sta_name, "latitude", latitude) dset.add_to_meta(sta_name, "height", height) dset.meta["tech"] = "vlbi" dset.add_to_meta("input", "file", parser.file_path.stem) dset.add_to_meta("input", "type", config.tech.obs_format.str.upper()) if "meta" not in data: master = apriori.get("vlbi_master_schedule", rundate=rundate) master_data = master.get((rundate.timetuple().tm_yday, session), {}) dset.add_to_meta("input", "session_code", master_data.get("session_code", "")) else: dset.add_to_meta("input", "session_code", data["meta"].get("session_code", "")) reg_hits = re.search("\d", dset.meta["input"]["session_code"]) num_idx = reg_hits.start() if reg_hits else len( dset.meta["input"]["session_code"]) dset.add_to_meta("input", "session_type", dset.meta["input"]["session_code"][:num_idx]) # Final cleanup # If there are more than 300 sources in a NGS-file the source names are gibberish bad_source_idx = ra == 0 bad_sources = np.array(dset.source)[bad_source_idx] for s in np.unique(bad_sources): log.warn( "Unknown source {}. Observations with this source is discarded", s) dset.subset(np.logical_not(bad_source_idx))
def _write_to_dataset(parser, dset, rundate, session): data = parser.as_dict() units = data["meta"].get("units", {}) # Session meta dset.meta.add("tech", "vlbi") dset.meta.add("file", parser.file_path.stem, section="input") dset.meta.add("type", config.tech.obs_format.str.upper(), section="input") if "meta" not in data: # Only read master file if session_code is not available in data["meta"] # This is to avoid a dependency to the master file which changes frequently master = apriori.get("vlbi_master_schedule", rundate=rundate) master_data = master.get((rundate.timetuple().tm_yday, session), {}) session_code = master_data.get("session_code", "") else: master = apriori.get("vlbi_master_schedule") session_code = data["meta"].get("session_code", "") dset.meta.add("session_code", session_code, section="input") dset.meta.add("session_type", master.session_type(session_code), section="input") log.info(f"Session code: {session_code}") # Convert source names to official IERS names source_names = apriori.get("vlbi_source_names") iers_source_names = [ source_names[src]["iers_name"] if src in source_names else src for src in data["source"] ] data["source"] = iers_source_names # Replace spaces in station names with underscores to match official IVS name data["station_1"] = np.char.replace(data["station_1"], " ", "_") data["station_2"] = np.char.replace(data["station_2"], " ", "_") dset.num_obs = len(data["time"]) dset.add_time("time", val=data.pop("time"), scale="utc", fmt="isot", write_level="operational") for field, values in data.items(): values = np.array(values) if values.dtype.kind in {"U", "S"}: multiplier = -1 if field.endswith("_1") else 1 dset.add_text(field, val=values, multiplier=multiplier, write_level="operational") elif values.dtype.kind in {"f", "i"}: multiplier = -1 if field.endswith("_1") else 1 unit = units.get(field, None) dset.add_float(field, val=values, multiplier=multiplier, write_level="operational", unit=unit) elif values.dtype.kind in {"O"}: continue else: log.warn(f"Unknown datatype {values.dtype} for field {field}") # Source directions crf = apriori.get("crf", time=dset.time) ra = np.array([ crf[s].pos.right_ascension if s in crf else 0 for s in data["source"] ]) dec = np.array( [crf[s].pos.declination if s in crf else 0 for s in data["source"]]) dset.add_direction("src_dir", ra=ra, dec=dec, time=dset.time, write_level="operational") # Station information log.info(f"Found stations: {', '.join(dset.unique('station'))}") trf = apriori.get("trf", time=dset.time) station_codes = apriori.get("vlbi_station_codes") dset.add_text( "baseline", val=np.array([ f"{s1}/{s2}" for s1, s2 in zip(data["station_1"], data["station_2"]) ]), write_level="operational", ) for site in dset.unique("station"): if site in station_codes: cdp = station_codes[site]["cdp"] trf_site = trf[cdp] else: named_site = trf.named_site(site) trf_site = trf.closest(named_site.pos) cdp = trf_site.key ignore_stations = config.tech.ignore_station.stations.list logger = log.info if site in ignore_stations else log.warn logger( f"Undefined station name {site}. Assuming station is {trf_site.name}." ) data["pos_" + site] = trf_site.pos.trs.val _site_pos = np.mean(data[f"pos_{site}"], axis=0) log.debug( f"Using position {_site_pos} for {site} from {trf_site.source}") ivsname = station_codes[cdp]["name"] data["sta_" + site] = dict(site_id=cdp, cdp=cdp, ivsname=ivsname) # Positions itrs_pos_1 = np.array( [data["pos_" + s][i, :] for i, s in enumerate(data["station_1"])]) itrs_vel_1 = np.zeros((dset.num_obs, 3)) dset.add_posvel( "site_pos_1", val=np.concatenate((itrs_pos_1, itrs_vel_1), axis=1), ellipsoid=ellipsoid.get(config.tech.reference_ellipsoid.str.upper()), system="trs", time=dset.time, # other=dset.src_dir, write_level="operational", ) itrs_pos_2 = np.array( [data["pos_" + s][i, :] for i, s in enumerate(data["station_2"])]) itrs_vel_2 = np.zeros((dset.num_obs, 3)) dset.add_posvel( "site_pos_2", val=np.concatenate((itrs_pos_2, itrs_vel_2), axis=1), ellipsoid=ellipsoid.get(config.tech.reference_ellipsoid.str.upper()), system="trs", time=dset.time, # other=dset.src_dir, write_level="operational", ) # Compute aberrated source directions def aberrated_src_dir(site_pos): """See IERS2010 Conventions, equation 11.15""" site_vel_gcrs = site_pos.gcrs.vel.val eph = apriori.get("ephemerides", time=dset.time) vel = eph.vel_bcrs("earth") + site_vel_gcrs return ( dset.src_dir.unit_vector + vel / constant.c - dset.src_dir.unit_vector * (dset.src_dir.unit_vector[:, None, :] @ vel[:, :, None])[:, :, 0] / constant.c) k_1 = aberrated_src_dir(dset.site_pos_1) dset.add_direction("abr_src_dir_1", val=k_1, system="gcrs", time=dset.time) dset.site_pos_1.other = dset.abr_src_dir_1 k_2 = aberrated_src_dir(dset.site_pos_2) dset.add_direction("abr_src_dir_2", val=k_2, system="gcrs", time=dset.time) dset.site_pos_2.other = dset.abr_src_dir_2 # Station data sta_fields = set().union( *[v.keys() for k, v in data.items() if k.startswith("sta_")]) for field in sta_fields: dset.add_text(field + "_1", val=[data["sta_" + s][field] for s in data["station_1"]], multiplier=-1) # write_level='analysis') dset.add_text(field + "_2", val=[data["sta_" + s][field] for s in data["station_2"]], multiplier=1) # write_level='analysis') # Station meta station_keys = sorted([k for k, v in data.items() if k.startswith("sta_")]) pos_keys = sorted([k for k, v in data.items() if k.startswith("pos_")]) for sta_key, pos_key in zip(station_keys, pos_keys): sta_name = sta_key.replace("sta_", "") cdp = data[sta_key]["cdp"] ivsname = station_codes[cdp]["name"] longitude, latitude, height, _ = sofa.iau_gc2gd( 2, data[pos_key][0, :]) # TODO: Reference ellipsoid dset.meta.add("cdp", cdp, section=ivsname) dset.meta.add("site_id", cdp, section=ivsname) dset.meta.add("domes", station_codes[cdp]["domes"], section=ivsname) dset.meta.add("marker", station_codes[cdp]["marker"], section=ivsname) dset.meta.add("description", station_codes[cdp]["description"], section=ivsname) dset.meta.add("longitude", longitude, section=ivsname) dset.meta.add("latitude", latitude, section=ivsname) dset.meta.add("height", height, section=ivsname) if sta_name != ivsname: dset.meta.add("cdp", cdp, section=sta_name) dset.meta.add("site_id", cdp, section=sta_name) dset.meta.add("domes", station_codes[cdp]["domes"], section=sta_name) dset.meta.add("marker", station_codes[cdp]["marker"], section=sta_name) dset.meta.add("description", station_codes[cdp]["description"], section=sta_name) dset.meta.add("longitude", longitude, section=sta_name) dset.meta.add("latitude", latitude, section=sta_name) dset.meta.add("height", height, section=sta_name) # Final cleanup # If there are more than 300 sources in a NGS-file the source names are gibberish bad_source_idx = ra == 0 bad_sources = np.array(dset.source)[bad_source_idx] for s in np.unique(bad_sources): log.warn( f"Unknown source {s}. Observations with this source is discarded") dset.subset(np.logical_not(bad_source_idx))
def update_dataset(self, dset, param_names, normal_idx, num_unknowns): """Update the given dataset with results from the filtering Args: dset (Dataset): The dataset. param_names (List): Strings with names of parameters. Used to form field names. normal_idx (Slice): Slice denoting which parameters should be used for the normal equations. num_unknowns (Int): Number of unknowns. """ # Update dataset with state and estimation fields and calculate new residuals self._add_fields(dset, param_names) dset.residual[:] = dset.est - (dset.obs - dset.calc) num_unknowns += dset.meta.get("num_clock_coeff", 0) # Calculate normal equations, and add statistics about estimation to dataset N, b = self._normal_equations(normal_idx, dset.num_obs - 1) g = self.x_hat[dset.num_obs - 1, normal_idx, :] deg_freedom = dset.num_obs - num_unknowns v = dset.residual[:, None] P = np.diag(1 / self.r[:dset.num_obs]) sq_sum_residuals = np.asscalar(v.T @ P @ v) sq_sum_omc_terms = np.asscalar(2 * b.T @ g - g.T @ N @ g) variance_factor = sq_sum_residuals / deg_freedom if deg_freedom != 0 else np.inf log.info( f"Variance factor = {variance_factor:.4f}, degrees of freedom = {deg_freedom:d}" ) # Report and set analysis status if there are too few degrees of freedom if deg_freedom < 1: log.error( f"Degrees of freedom is {deg_freedom} < 1. Estimate fewer parameters" ) if dset.meta.get("analysis_status") == "unchecked": dset.meta["analysis_status"] = "too few degrees of freedom" else: if dset.meta.get( "analysis_status") == "too few degrees of freedom": dset.meta["analysis_status"] = "unchecked" # Report and set analysis status if there are too few stations # TODO: if vlbi_site_pos in state_vector and num_stations < 3 estimate_site_pos = np.char.startswith( np.array(param_names, dtype=str), "vlbi_site_pos").any() if len(dset.unique("station")) < 3 and estimate_site_pos: log.warn( f"Too few stations {len(dset.unique('station'))} < 3. Do not estimate station positions." ) # if dset.meta.get("analysis_status") == "unchecked": # dset.meta["analysis_status"] = "needs custom state vector" elif len(dset.unique("station")) < 3 and estimate_site_pos: if dset.meta.get("analysis_status") == "needs custom state vector": dset.meta["analysis_status"] = "unchecked" # Update config cfg_vars = dset.vars.copy() cfg_vars.pop("rundate") with config.update_tech_config(dset.analysis["rundate"], cfg_vars.pop("pipeline"), **cfg_vars) as cfg: cfg.update("analysis_status", "status", dset.meta.get("analysis_status", ""), source=__file__) # Add information to dset.meta dset.meta.add("number of observations", dset.num_obs, section="statistics") dset.meta.add("number of unknowns", num_unknowns, section="statistics") dset.meta.add("square sum of residuals", sq_sum_residuals, section="statistics") dset.meta.add("degrees of freedom", deg_freedom, section="statistics") dset.meta.add("variance factor", variance_factor, section="statistics") dset.meta.add("weighted square sum of o-c", sq_sum_residuals + sq_sum_omc_terms, section="statistics") dset.meta.add("matrix", N.tolist(), section="normal equation") dset.meta.add("vector", b[:, 0].tolist(), section="normal equation") dset.meta.add("names", param_names[normal_idx], section="normal equation") dset.meta.add("unit", [ config.tech[f.split("-")[0]].unit.str for f in param_names[normal_idx] ], section="normal equation") # TODO should this be here? log.info("Solving normal equations") names = dset.meta["normal equation"]["names"] n = len(names) d = np.zeros((n, 6)) fix_param_weight = np.zeros(n) H = np.zeros((6, n)) stations = set() from where import apriori reference_frame = config.tech.reference_frames.list[0] trf = apriori.get("trf", time=dset.time.utc.mean, reference_frames=reference_frame) # thaller2008: eq 2.51 (skipping scale factor) for idx, column in enumerate(names): if "_site_pos-" not in column: continue station = column.split("-", maxsplit=1)[-1].rsplit("_", maxsplit=1)[0] site_id = dset.meta[station]["site_id"] if site_id in trf: x0, y0, z0 = trf[site_id].pos.trs if column.endswith("_x"): d[idx, :] = np.array([1, 0, 0, 0, z0, -y0]) if column.endswith("_y"): d[idx, :] = np.array([0, 1, 0, -z0, 0, x0]) if column.endswith("_z"): d[idx, :] = np.array([0, 0, 1, y0, -x0, 0]) stations.add(station) if len(stations) >= 3: try: # thaller2008: eq 2.57 H = np.linalg.inv(d.T @ d) @ d.T log.info( f"Applying NNT/NNR with {', '.join(stations)} from {reference_frame.upper()}" ) except np.linalg.LinAlgError: log.warn(f"Unable to invert matrix for NNR/NNT constraints") else: log.info( f"Too few stations to use NNR/NNT contraints from {reference_frame.upper()}. Using absolute constraints for station positions." ) # Too few stations to use NNT/NNR? for idx, column in enumerate(names): if "_site_pos-" not in column: continue station = column.split("-", maxsplit=1)[-1].rsplit("_", maxsplit=1)[0] fix_param_weight[idx] = 1 / (1e-6)**2 # 1/meters**2 sigmas = [0.0001] * 3 + [1.5e-11] * 3 # NNR to CRF if "celestial_reference_frames" in config.tech.master_section: celestial_reference_frame = config.tech.celestial_reference_frames.list[ 0] crf = apriori.get( "crf", time=dset.time, celestial_reference_frames=celestial_reference_frame) H2 = np.zeros((3, n)) for idx, column in enumerate(names): if "_src_dir-" not in column: continue source = column.split("-", maxsplit=1)[-1].split("_")[0] if source in crf: ra = crf[source].pos.right_ascension dec = crf[source].pos.declination if dset.num(source=source) < 5: fix_param_weight[idx] = 1 / (1e-12)**2 # 1/radians**2 if column.endswith("_ra"): log.info( f"Too few observations for source {source}. Using absolute constraints for source positions." ) continue if column.endswith("_ra"): H2[0, idx] = -np.cos(ra) * np.sin(dec) * np.cos(dec) H2[1, idx] = -np.sin(ra) * np.sin(dec) * np.cos(dec) H2[2, idx] = np.cos(dec)**2 if column.endswith("_dec"): H2[0, idx] = np.sin(ra) H2[1, idx] = -np.cos(ra) if H2.any(): log.info( f"Applying NNR constraint to {celestial_reference_frame.upper()}" ) # add NNR to CRF constraints H = np.concatenate((H, H2)) sigmas = sigmas + [1e-6] * 3 # thaller2008: eq 2.45 P_h = np.diag(1 / np.array(sigmas)**2) # Free network constraints: thaller2008: eq 2.58 N_h = N + H.T @ P_h @ H # Baselines with too few obs? for idx, column in enumerate(names): if "_baseline-" not in column: continue baseline = column.split("-", maxsplit=1)[-1].rsplit("_", maxsplit=1)[0] if dset.num(baseline=baseline) < 5: fix_param_weight[idx] = 1 / (1e-6)**2 # 1/meters**2 log.info( f"Too few observations for baseline {baseline}. Constrained to a priori value" ) continue # Absolute constraints (on sources with too few observations): thaller2008: eq.2.49 N_h += np.diag(fix_param_weight) # solve neq N_h_inv = np.linalg.inv(N_h) x = N_h_inv @ b # Covariance: thaller2008: eq 2.16 Q_xx = variance_factor**2 * N_h_inv dset.meta.add("solution", x[:, 0].tolist(), section="normal equation") dset.meta.add("covariance", Q_xx.tolist(), section="normal equation")
def satellite_phase_center_offset(self, dset, sys_freq=None): """Determine satellite phase center offset correction vectors given in ITRS Satellite phase center offset (PCO) corrections are frequency dependent. The argument 'sys_freq' defines, which frequencies for a given GNSS should be used. If two frequencies for a GNSS are given, then the PCO is determined as ionospheric-free linear combination. If 'sys_freq' is not defined as input argument, then 'sys_freq' is generated based on the given observation types in dataset 'dset'. Args: dset (Dataset): Model data. sys_freq (dict): Dictionary with frequency or frequency combination given for GNSS identifier: sys_freq = { <sys_id>: <freq> } (e.g. sys_freq = {'E': 'E1', 'G': 'L1_L2'} ) Returns: numpy.ndarray: Satellite phase center offset correction vectors given in ITRS in meter """ # GNSS Freq number GNSS freq # L<num>/C<num> # ___________________________________________ # C (BeiDou): 2 'B1' # 7 'B2' # 6 'B3' # G (GPS): 1 'L1' # 2 'L2' # 5 'L5' # R (GLONASS): 1 'G1' # 2 'G2' # 3 'G3' # E (Galileo): 1 'E1' # 8 'E5 (E5a+E5b)' # 5 'E5a' # 7 'E5b' # 6 'E6' # I (IRNSS): 5 'L5' # 9 'S' # J (QZSS): 1 'L1' # 2 'L2' # 5 'L5' # 6 'LEX' # S (SBAS): 1 'L1' # 5 'L5' obstype_to_gnss_freq = { "C": {"2": "B1", "7": "B2", "6": "B3"}, "E": {"1": "E1", "8": "E5", "5": "E5a", "7": "E5b", "6": "E6"}, "G": {"1": "L1", "2": "L2", "5": "L5"}, "I": {"5": "L5", "9": "S"}, "J": {"1": "L1", "2": "L2", "5": "L5", "6": "LEX"}, "R": {"1": "G1", "2": "G2", "3": "G3"}, "S": {"1": "L1", "5": "L5"}, } correction = np.zeros((dset.num_obs, 3)) used_date = None # Get GNSS frequency based on observation type if sys_freq is None: sys_freq = dict() obstypes = dset.meta["obstypes"] for sys in obstypes: freqs = {o[1] for o in obstypes[sys]} sys_freq[sys] = "_".join(f"{obstype_to_gnss_freq[sys][f]}" for f in sorted(freqs)) # Loop over all satellites given in RINEX observation file and configuration file for sat in dset.unique("satellite"): # Skip satellites, which are not given in ANTEX file if sat not in self.data: # antex: log.warn( "Satellite {} is not given in ANTEX file {}. That means no satellite antenna phase center offset " "correction can be applied for satellite {}.", sat, self.file_path, sat, ) continue # Get array with information about, when observation are available for the given satellite (indicated by True) idx = dset.filter(satellite=sat) # Get used date used_date = self._used_date(sat, dset.rundate) if used_date is None: continue # Add PCO to Dataset meta system = sat[0] pco_sat = self._get_pco_sat(sat, sys_freq, used_date) dset.meta.setdefault("pco_sat", dict()).update({sat: pco_sat.tolist()[0]}) # Transform PCO given in satellite body-fixed reference frame (for GPS and Galileo assumed to be aligned # to yaw-steering reference frame) to ITRS pco_itrs = dset.sat_posvel.convert_yaw_to_itrs(pco_sat) # pco_itrs = dset.sat_posvel._yaw2itrs[idx][0] @ np.array(pco_sat) correction[idx] = pco_itrs[idx] return correction
def _generate_dataframes( dset: Dict[str, "Dataset"]) -> Dict[str, pd.core.frame.DataFrame]: """Generate dataframe based on station datasets The dataframe for each station in dictionary "dfs" has following columns: site_vel_h: Horizontal site velocity site_vel_east: Site velocity east component of topocentric coordinates site_vel_north: Site velocity north component of topocentric coordinates site_vel_up: Site velocity up component of topocentric coordinates site_vel_3d: 3D site velocity Example for "dfs" dictionary: 'hons': time.gps site_vel_h site_vel_3d 0 2019-03-01 00:00:00 0.301738 0.057244 1 2019-03-01 00:00:00 0.301738 0.057244 'krss': time.gps site_vel_h site_vel_3d 0 2019-03-01 00:00:00 0.710014 0.186791 1 2019-03-01 00:00:00 0.710014 0.186791 Example for "dfs_day" dictionary for "mean" key: 'mean':{ 'site_vel_h': nabf vegs hons krss time.gps 2019-03-01 1.368875 0.935687 1.136763 0.828754 2019-03-02 0.924839 0.728280 0.911677 0.854832 'site_vel_3d': nabf vegs hons krss time.gps 2019-03-01 1.715893 1.147265 1.600330 0.976541 2019-03-02 1.533437 1.307373 1.476295 1.136991 } Example for "dfs_month" dictionary for "mean" key: 'mean':{ 'site_vel_h': nabf vegs hons krss Mar-2019 1.186240 0.861718 1.095827 1.021354 Apr-2019 0.891947 0.850343 0.977908 0.971099 'site_vel_3d': nabf vegs hons krss Mar-2019 1.854684 1.291406 1.450466 1.225467 Apr-2019 1.964404 1.706507 1.687994 1.500742 } Args: dset: Dictionary with station name as keys and the belonging Dataset as value Returns: Tuple with following entries: | Element | Description | |----------------------|--------------------------------------------------------------------------------------| | dfs | Dictionary with station name as keys and the belonging dataframe as value with | | | following dataframe columns: site_vel_h, site_vel_3d | | dfs_day | Dictionary with function type as keys ('mean', 'percentile', 'rms', 'std') and a | | | dictionary as values. The dictionary has fields as keys (e.g. site_vel_h, | | | site_vel_3d) and the belonging dataframe as value with DAILY samples of 95th | | | percentile and stations as columns. | | dfs_month | Dictionary with function type as keys ('mean', 'percentile', 'rms', 'std') and a | | | dictionary as values. The dictionary has fields as keys (e.g. site_vel_h, | | | site_vel_3d) and the belonging dataframe as value with MONTHLY samples of 95th | | | percentile and stations as columns. | | dfs_month | Dictionary with fields as keys (e.g. site_vel_h, site_vel_3d) and the belonging | | | dataframe as value with MONTHLY samples of 95th percentile and stations as columns. | """ dsets = dset dfs = {} fields = { "site_vel_east": pd.DataFrame(), "site_vel_north": pd.DataFrame(), "site_vel_up": pd.DataFrame(), "site_vel_h": pd.DataFrame(), "site_vel_3d": pd.DataFrame(), } dfs_day = { "mean": copy.deepcopy(fields), "percentile": copy.deepcopy(fields), "std": copy.deepcopy(fields), "rms": copy.deepcopy(fields), } dfs_month = { "mean": copy.deepcopy(fields), "percentile": copy.deepcopy(fields), "std": copy.deepcopy(fields), "rms": copy.deepcopy(fields), } for station, dset in dsets.items(): # Add necessary site velocity fields to dataset if "site_vel_3d" not in dset.fields: gnss_velocity_fields(dset) if dset.num_obs == 0: log.warn(f"Dataset '{station}' is empty.") continue # Determine dataframe with site_vel_h and site_vel_3d columns df = dset.as_dataframe(fields=[ "time.gps", "site_vel_east", "site_vel_north", "site_vel_up", "site_vel_h", "site_vel_3d", ]) if df.empty: continue else: # Save data in dictionaries dfs.update({station: df}) for type_ in dfs_day.keys(): df_day = _apply(df, "D", type_) for field in fields.keys(): if dfs_day[type_][field].empty: dfs_day[type_][field][station] = df_day[field] else: dfs_day[type_][field] = pd.concat( [dfs_day[type_][field], df_day[field]], axis=1) dfs_day[type_][field] = dfs_day[type_][field].rename( columns={field: station}) df_month = _apply(df, "M", type_) df_month.index = df_month.index.strftime("%b-%Y") for field in fields.keys(): dfs_month[type_][field][station] = df_month[field] return dfs, dfs_day, dfs_month
def gnss_linear_combination(dset: "Dataset") -> None: """Add GNSS linar observation combinations to dataset Args: dset: A Dataset containing model data. """ func = { "code_multipath": linear_combination_cmc, "code_phase": code_phase_difference, "geometry_free": linear_combination, "ionosphere_free": linear_combination, "melbourne_wuebbena": linear_combination_melbourne, "narrow_lane": linear_combination, "wide_lane": linear_combination, } for comb_name in config.tech[_SECTION].linear_combination.list: log.debug(f"Add {comb_name} combination to dataset.") # Code-multipath linear combination if comb_name == "code_multipath": try: cmc1, cmc2 = func[comb_name](dset) except ValueError: log.warn( f"Code multipath linear combination is not added to dataset. Dual-frequency code and phase " f"observations are needed.") continue dset.add_float(f"lin.{comb_name}_f1", val=cmc1["val"], unit="meter") dset.add_float(f"lin.{comb_name}_f2", val=cmc2["val"], unit="meter") dset.meta.setdefault("linear_combination", dict()).update( {f"{comb_name}_f1": cmc1["sys_obs"]}) dset.meta["linear_combination"][f"{comb_name}_f2"] = cmc2[ "sys_obs"] # Code-phase difference elif comb_name == "code_phase": try: code_phase_1, code_phase_2 = func[comb_name](dset) except ValueError: log.warn( f"Code-phase difference is not added to dataset. Dual-frequency code and phase observations " f"are needed.") continue dset.add_float(f"lin.{comb_name}_f1", val=code_phase_1["val"], unit="meter") dset.add_float(f"lin.{comb_name}_f2", val=code_phase_2["val"], unit="meter") dset.meta.setdefault("linear_combination", dict()).update( {f"{comb_name}_f1": code_phase_1["sys_obs"]}) dset.meta["linear_combination"][f"{comb_name}_f2"] = code_phase_2[ "sys_obs"] # Melbourne-Wuebbena linear combination elif comb_name == "melbourne_wuebbena": try: linear_comb = func[comb_name](dset) except ValueError: log.warn( f"Melbourne-Wübbena linear combination is not added to dataset. Dual-frequency code and " f"phase observations are needed.") continue dset.add_float( f"lin.{comb_name}", val=linear_comb["val"], unit="meter", ) dset.meta.setdefault("linear_combination", dict()).update( {f"{comb_name}": linear_comb["sys_obs"]}) else: linear_comb = func[comb_name](comb_name, dset) for obs_code in linear_comb.keys(): dset.add_float( f"lin.{comb_name}_{obs_code}", val=linear_comb[obs_code]["val"], unit="meter", ) dset.meta.setdefault("linear_combination", dict()).update({ f"{comb_name}_{obs_code}": linear_comb[obs_code]["sys_obs"] })
def _plot_satellite_overview( dset: "Dataset", figure_dir: "pathlib.PosixPath") -> Union[None, Enum]: """Plot satellite observation overview Args: dset: A dataset containing the data. figure_dir: Figure directory Returns: Error exit status if necessary datasets could not be read """ figure_path = figure_dir / f"plot_satellite_overview.{FIGURE_FORMAT}" # Limit x-axis range to rundate day_start, day_end = _get_day_limits(dset) # Get time and satellite data from read and orbit stage file_vars = {**dset.vars, **dset.analysis} file_vars["stage"] = "read" file_path = config.files.path("dataset", file_vars=file_vars) if file_path.exists(): time_read, satellite_read = _sort_by_satellite( _get_dataset(dset, stage="read", systems=dset.meta["obstypes"].keys())) time_orbit, satellite_orbit = _sort_by_satellite( _get_dataset(dset, stage="orbit", systems=dset.meta["obstypes"].keys())) time_edit, satellite_edit = _sort_by_satellite( _get_dataset(dset, stage="edit", systems=dset.meta["obstypes"].keys())) else: # NOTE: This is the case for concatencated Datasets, where "read" and "edit" stage data are not available. log.warn( f"Read dataset does not exists: {file_path}. Plot {figure_path} can not be plotted." ) return enums.ExitStatus.error # Generate plot plot( x_arrays=[time_read, time_orbit, time_edit], y_arrays=[satellite_read, satellite_orbit, satellite_edit], xlabel="Time [GPS]", ylabel="Satellite", y_unit="", # labels = ["Rejected in orbit stage", "Rejected in edit stage", "Kept observations"], colors=["red", "orange", "green"], figure_path=figure_path, opt_args={ "colormap": "tab20", "figsize": (7, 6), "marker": "|", "plot_to": "file", "plot_type": "scatter", "title": "Overview over satellites", "xlim": [day_start, day_end], }, )
def file_vars(): """File variables that will be available during the running of this technique In addition, date and analysis variables are available. Returns: Dict: File variables special for this technique. """ _file_vars = dict() # Add obs_version for ngs if config.tech.get("obs_format").str == "ngs": versions = config.files.glob_variable("vlbi_obs_ngs", "obs_version", r"\d{3}") if versions: _file_vars["obs_version"] = max(versions) elif config.where.files.download_missing.bool: # Look online for a candidate log.info( "No NGS observation file found on disk: Looking for one online." ) obs_versions = [f"{v:03d}" for v in reversed(range(4, 10))] for obs_version in obs_versions: url = config.files.url("vlbi_obs_ngs", file_vars=dict(obs_version=obs_version), is_zipped=True, use_aliases=False) log.info(f"Looking for {url} ...") if url.exists(): _file_vars["obs_version"] = obs_version break if not _file_vars: log.fatal("No NGS observation file found") # Add obs_version for vgosdb if config.tech.get("obs_format").str == "vgosdb": versions = config.files.glob_variable("vlbi_obs_vgosdb", "obs_version", r"\d{3}") if versions: _file_vars["obs_version"] = max(versions) agencies = config.files.glob_variable("vlbi_obs_vgosdb", "agency", r"[\w]+", file_vars=_file_vars) if agencies: _file_vars[ "agency"] = "IVS" if "IVS" in agencies else agencies.pop() if len(agencies) > 1: log.warn( f"Multiple agencies found ({', '.join(agencies)}) for file key vlbi_obs_vgosdb. Using {_file_vars['agency']}" ) if not "obs_version" in _file_vars and not "acengy" in _file_vars: log.fatal( f"No VGOSDB wrapper file found ({config.files.path('vlbi_obs_vgosdb')})." ) # Sinex file vars if "sinex" in config.tech.section_names: _file_vars["solution"] = config.tech.sinex.solution.str _file_vars["file_agency"] = config.tech.sinex.file_agency.str.lower() return _file_vars
def write_dataframe_to_markdown(self, df: "Dataframe", format: str = "", statistic: bool = False) -> None: """Write Pandas DataFrame to Markdown table Args: df: Pandas DataFrame format: Define formatters for float columns (e.g. format = '6.3f') statistic: Write statistical information rows at the end of the Markdown table """ column_length = [len(c) for c in df.columns] # Check if dataframe is not empty if df.empty: log.warn("Nothing to write. Dataframe is empty.") return 0 # Write header if list(df.index): # Add DataFrame index to header num_space = len(max(df.index)) head_line_1 = "\n| {} ".format(" " * num_space) head_line_2 = "|-{}-".format("-" * num_space) else: header_1 = "" self.fid.write(head_line_1 + "| {} |\n".format(" | ".join(list(df.columns)))) self.fid.write( head_line_2 + "|-{}:|\n".format("-|-".join([n * "-" for n in column_length]))) # Write data for index, row in df.iterrows(): line = "| {idx:s} |".format( idx=index) if index else "" # Add DataFrame index column for _, v in row.items(): if isinstance(v, float): line = line + " {:{fmt}} |".format(v, fmt=format) else: line = line + " {} |".format(v) self.fid.write(line + "\n") # Write statistical information rows at the end of table if statistic: line_max = "| **max** |" line_min = "| **min** |" line_mean = "| **mean** |" for index, col in df.iteritems(): line_max = line_max + " {:{fmt}} |".format(col.max(), fmt=format) line_min = line_min + " {:{fmt}} |".format(col.min(), fmt=format) line_mean = line_mean + " {:{fmt}} |".format(col.mean(), fmt=format) self.fid.write(line_max + "\n") self.fid.write(line_min + "\n") self.fid.write(line_mean + "\n") self.fid.write("\n")
def clock_correction(dset): """Estimate clock polynomial """ # Take previous clock corrections into account try: output = dset.vlbi_clock_correction except AttributeError: output = np.zeros(dset.num_obs) # Read order of clock polynomial from config file terms = 1 + config.tech.get("order_of_polynomial", section=MODEL, default=2).int # Read clock breaks from session config, only split on commas (and commas followed by whitespace) clock_breaks = config.tech.get("clock_breaks", section=MODEL, default="").as_list(split_re=", *") stations, time_intervals = parse_clock_breaks(dset, clock_breaks) # Read reference clock from edit file and store in dataset ref_clock_str = config.tech.get("reference_clock", section=MODEL, default="").str ref_clock = parse_reference_clock(stations, ref_clock_str) dset.meta["ref_clock"] = ref_clock # Remove reference clock from list of clocks to be estimated idx = stations.index(ref_clock) del stations[idx] del time_intervals[idx] # Number of clock polynomial coefficients num_coefficients = len(stations) * terms param_names = [ sta + " clk_a" + str(t) + " " + time_intervals[i][0].utc.iso + " " + time_intervals[i][1].utc.iso for i, sta in enumerate(stations) for t in range(terms) ] dset.meta["num_clock_coeff"] = num_coefficients # Set up matrices for estimation A = np.zeros((dset.num_obs, num_coefficients, 1)) # Time coefficients, used when setting up A t = dset.time.utc.mjd - dset.time.utc[0].mjd poly = np.array([t ** n for n in range(terms)]).T # Set up the A matrix with time coefficients for idx, (station, (t_start, t_end)) in enumerate(zip(stations, time_intervals)): filter_time = np.logical_and(t_start.utc.mjd <= dset.time.utc.mjd, dset.time.utc.mjd < t_end.utc.mjd) filter_1 = np.logical_and(dset.filter(station_1=station), filter_time) A[filter_1, idx * terms : (idx + 1) * terms, 0] = poly[filter_1] filter_2 = np.logical_and(dset.filter(station_2=station), filter_time) A[filter_2, idx * terms : (idx + 1) * terms, 0] = -poly[filter_2] # Calculate normal matrix N and the moment vector U U = np.sum(A @ dset.residual[:, None, None], axis=0) N = np.sum(A @ A.transpose(0, 2, 1), axis=0) # Invert the normal matrix to find corrections, only the non-zero part of the matrix is inverted idx = np.logical_not(U == 0)[:, 0] X = np.zeros((num_coefficients, 1)) det = np.linalg.det(N[idx, :][:, idx]) threshold = 1e-12 if np.abs(det) < threshold: # TODO: what is a good threshold value? rank = np.linalg.matrix_rank(N[idx, :][:, idx]) log.warn(f"Determinant of normal matrix in clock correction is close to zero ({det})") log.info(f"Normal matrix shape = {N.shape}, normal matrix rank = {rank}") _, R = np.linalg.qr(N[idx, :][:, idx]) for i, row in enumerate(R): if np.max(np.abs(row)) < threshold * 10 ** 3: log.error(f"{param_names[i]} linearly dependent (max_row = {np.max(np.abs(row))})") try: X[idx] = np.linalg.inv(N[idx, :][:, idx]) @ U[idx] except np.linalg.LinAlgError: log.fatal("Singular matrix in vlbi_clock_correction") # Calculate final corrections output += (A.transpose(0, 2, 1) @ X)[:, 0, 0] return output
def _organize_data(self): """ Copy content from self.raw to self.data and convert all data to arrays with num_obs length """ meta = self.data.setdefault("meta", {}) meta["session_code"] = self.raw["Session"].get("Session") # Epoch info self.data["time"] = self.raw["Observables"]["TimeUTC"]["time"] self.data["station_1"] = self.raw["Observables"]["Baseline"][ "Baseline"][:, 0] self.data["station_2"] = self.raw["Observables"]["Baseline"][ "Baseline"][:, 1] self.data["source"] = self.raw["Observables"]["Source"]["Source"] # Obs info self.data["observed_delay_ferr"] = self.raw["Observables"][ "GroupDelay"]["X"]["GroupDelaySig"] * constant.c self.data["observed_delay"] = self.raw["ObsEdit"]["GroupDelayFull"][ "X"]["GroupDelayFull"] * constant.c try: self.data["data_quality"] = self.raw["ObsEdit"]["Edit"][ "DelayFlag"] except KeyError: self.data["data_quality"] = np.full(len(self.data["time"]), np.nan) log.warn("Missing data quality information") self.data["iono_delay"] = ( self.raw["ObsDerived"]["Cal-SlantPathIonoGroup"]["X"] ["Cal-SlantPathIonoGroup"][:, 0] * constant.c) self.data["iono_delay_ferr"] = ( self.raw["ObsDerived"]["Cal-SlantPathIonoGroup"]["X"] ["Cal-SlantPathIonoGroupSigma"][:, 0] * constant.c) try: self.data["iono_quality"] = self.raw["ObsDerived"][ "Cal-SlantPathIonoGroup"]["X"][ "Cal-SlantPathIonoGroupDataFlag"] except KeyError: self.data["iono_quality"] = np.full(len(self.data["time"]), np.nan) log.warn("Missing ionosphere quality information") # Station dependent info for field, params in self._STATION_FIELDS.items(): self.data[field + "_1"] = np.zeros(len(self.data["time"])) self.data[field + "_2"] = np.zeros(len(self.data["time"])) for station in self.raw["Head"]["StationList"]: sta_idx_1 = self.data["station_1"] == station sta_idx_2 = self.data["station_2"] == station sta_key = station.replace(" ", "_") sta_time = self.raw[sta_key]["TimeUTC"]["sec_since_ref"] try: sta_data = self.raw[sta_key][params["filestub"]][ params["variable"]] except KeyError: sta_data = np.full(len(sta_time), np.nan) log.warn("Missing {} data for {}", field, station) func = interpolate.interp1d(sta_time, sta_data, bounds_error=False, fill_value=(sta_data[0], sta_data[-1]), assume_sorted=True) epochs_1 = self.raw["Observables"]["TimeUTC"]["sec_since_ref"][ sta_idx_1] epochs_2 = self.raw["Observables"]["TimeUTC"]["sec_since_ref"][ sta_idx_2] self.data[field + "_1"][sta_idx_1] = func(epochs_1) * params["factor"] self.data[field + "_2"][sta_idx_2] = func(epochs_2) * params["factor"]
def orbit(stage, dset): """Determine GNSS satellite orbit TODO: Is the workflow for determining the satellite transmission time correct? gLAB determines satellite clock correction based on receiver time and not an satellite transmission time. Additionally gLAB does not apply relativistic corrections. Args: stage (str): Name of current stage. dset (Dataset): A dataset containing the data. """ station = dset.vars["station"] orb_flag = config.tech.apriori_orbit.str # First estimate of satellite transmission time sat_time = dset.time - gnss.get_initial_flight_time(dset) # Second estimate using satellite clock and relativistic clock corrections orbit = apriori.get("orbit", rundate=dset.analysis["rundate"], system=tuple(dset.unique("system")), station=station) orbit.dset_raw.write_as(stage=stage, session=station, dataset_name="raw") orbit.dset_edit.write_as(stage=stage, session=station, dataset_name="edit") # Determine initial satellite orbit solution with observation time as approximation orbit.calculate_orbit(dset, time="time") # TODO: Has it an effect to iterate here to improve satellite transmission time? # Determine satellite transmission time based on initial satellite orbit solution dset.add_time( "sat_time", val=dset.time - gnss.get_initial_flight_time( dset, sat_clock_corr=orbit.dset.gnss_satellite_clock, rel_clock_corr=orbit.dset.gnss_relativistic_clock), scale=dset.time.scale, ) # Use satellite transmission time for determination of satellite orbits orbit.calculate_orbit(dset, time="sat_time") # Copy to regular dataset dset.add_posvel("sat_posvel", itrs=orbit.dset.sat_posvel.itrs, time="sat_time", other="site_pos") # TODO: Is is possible to set the "calc_models" table in the model part? dset.add_float("gnss_satellite_clock", val=-orbit.dset.gnss_satellite_clock, table="calc_models") dset.add_float("gnss_relativistic_clock", val=-orbit.dset.gnss_relativistic_clock, table="calc_models") dset.site_pos.connect(dset.sat_posvel) dset.vars[ "orbit"] = orb_flag # Needed e.g. for calling gnss_relativistic_clock model correctly. # Connect site position with satellite orbits needed for determination of elevation and azimuth dset.site_pos.connect(dset.sat_posvel) # Correct satellite position/velocity due Earth's rotation effect during signal flight time sat_pos, sat_vel = gnss.get_earth_rotation(dset) log.warn( "Correction of satellite position/velocity due to Earth's rotation effect during signal flight time is not" " applied.") # dset.sat_posvel.add_to_itrs( # np.hstack((sat_pos, sat_vel)) # ) # TODO: Check residuals will be worser by using that. Why? dset.add_posvel("gnss_earth_rotation", itrs=np.hstack((sat_pos, sat_vel)), time="sat_time") dset.write_as(stage=stage) dset.read() # TODO: workaround because caching does not work correctly
def _parse_position(self, line, cache): """Parse orbit position Bad or absent position values indicated by 0.000000 and clock values indicated by 999999.999999 are set to not a number 'nan'. Satellite identifier (e.g. G01) consists of satellite system identicator 'G' and a satellite number '01'. In the SP3-a format the satellite system identicator is not used. SP3-a was designed only for GPS satellites, therefore if the format version 'a' is given, the satellite system identicator 'G' is introduced before the satellite number. The standard deviations of the satellite and the clock correction has to multiplied with the base floating point numbers defined in header line 15. Args: line (dict): Dict containing the fields of a line. cache (dict): Temporary dictionary with the fields 'key' and 'values'. """ # Remove identical epochs given in two different SP3 files if cache["line_num"] == 2: if "time" in self.data: if cache["time"] in self.data["time"]: log.warn("Identical epoch {} given in the SP3 files.", cache["time"]) return date = "{year}-{month:02d}-{day:02d}".format( year=int(self.vars["yyyy"]), month=int(self.vars["m"]), day=int(self.vars["d"]) ) # TODO: What would be a better solution for getting current date? # SP3-a (GPS-only) format files missing satellite system identicator 'G' before satellite number if self.meta[date]["version"] == "a": line["sat"] = "G" + line["sat"].zfill(2) # Set bad or absent positional values to 'nan' indicated by 0.000000 for k in list(["pos_x", "pos_y", "pos_z"]): if float(line[k]) == 0.0: line[k] = float("nan") # Set bad or absent clock bias values to 'nan' indicated by 999999.999999 if float(line["clk_bias"]) == 999999.999999: line["clk_bias"] = float("nan") # Set not given sigmas in SP3 file to 'nan' for k in list(["sig_pos_x", "sig_pos_y", "sig_pos_z", "sig_clk_bias"]): if line[k] == "": line[k] = float("nan") self.data.setdefault("time", list()).append(cache["time"]) self.data.setdefault("satellite", list()).append(line["sat"]) self.data.setdefault("sat_pos", list()).append( np.array([ float(line["pos_x"]), float(line["pos_y"]), float(line["pos_z"]) ]) * unit.kilometer2meter) self.data.setdefault("sat_clock_bias", list()).append( float(line["clk_bias"]) * unit.microsecond2second * constant.c) self.data.setdefault("sat_pos_sigma", list()).append( np.array([ float(line["sig_pos_x"]), float(line["sig_pos_y"]), float(line["sig_pos_z"]) ]) * self.meta[date]["base_posvel"] * unit.millimeter2meter) self.data.setdefault("sat_clock_bias_sigma", list()).append( float(line["sig_clk_bias"]) * self.meta[date]["base_clkrate"] * unit.picosecond2second * constant.c) # Get GNSS identifier sys = line["sat"][0] self.data.setdefault("system", list()).append(sys)