def setup_config(rundate, pipeline, session): """Set up configuration for a Where analysis """ # Set the correct profile profile = util.read_option_value("--profile", default="") config.where.profiles = profile.split() + [pipeline] # Should a new analysis be started? start_new = util.check_options("-N", "--new") # Delete an analysis if util.check_options("-D", "--delete"): delete.delete_analysis(rundate, pipeline, session) if not start_new: raise SystemExit # Create configuration of a new analysis if start_new or not has_config(rundate, pipeline, session): create_config(rundate, pipeline, session) elif util.check_options( "--profile"): # Warning if --profile option is ignored profile_opt = f"--profile={util.read_option_value('--profile', default='')}" log.warn( f"Configuration already exists, option '{profile_opt}' ignored") # Update configuration based on command line options update_config(rundate, pipeline, session) # Edit configuration manually if util.check_options("-E", "--edit"): edit_config(rundate, pipeline, session) # Show current configuration if util.check_options("-S", "--show-config"): show_config(rundate, pipeline, session) raise SystemExit
def main(date: "datedoy", tech: "pipeline", ids: "option"): log.init(log_level="info") # Additional required options identifiers = [id_.strip() for id_ in ids.split(",")] difference_by = util.read_option_value("--difference_by").replace(",", " ").split() stage = util.read_option_value("--stage") # Get optional options dataset_id = util.read_option_value("--dset_id", default="last") dataset_id = "last" if dataset_id == "last" else int(dataset_id) dataset_name = util.read_option_value("--dset_name", default="") writer_names = util.read_option_value("--writers", default="").replace(",", " ").split() session = util.read_option_value("--session", default="") # Get datasets dset = data.Dataset( rundate=date, tech=tech, stage=stage, dataset_name=dataset_name, dataset_id=dataset_id, id="-" + identifiers[0] ) dset_other = data.Dataset( rundate=date, tech=tech, stage=stage, dataset_name=dataset_name, dataset_id=dataset_id, id="-" + identifiers[1] ) if dset.num_obs == 0: log.warn(f"Nothing to differentiate. Dataset '{identifiers[0]}' is empty.") return 1 if dset_other.num_obs == 0: log.warn(f"Nothing to differentiate. Dataset '{identifiers[1]}' is empty.") return 1 # Differentiate dataset dset_diff = dset.difference_with(dset_other, difference_by=difference_by) dset_diff.write_as(stage="difference") # Loop over writers for writer in writer_names: write(writer, dset_diff)
def main(date: "datedoy", pipeline: "pipeline", items: "option", specifier: "option"): log.init(log_level="info") dsets = dict() # Additional options stage = util.read_option_value("--stage") writer_names = util.read_option_value("--writers").replace(",", " ").split() items_ = [s.strip() for s in items.split(",")] # Get optional options label = util.read_option_value("--label", default="None") # TODO label = "last" if label == "last" else label station = util.read_option_value("--station", default="") id_ = util.read_option_value("--id", default="") # Get dataset variables dset_vars = dict(pipeline=pipeline, stage=stage, station=station, label=label, id=id_) dset_vars = config.create_file_vars(rundate=date, **dset_vars) # Read datasets for given specifier if specifier == "id": for id_ in items_: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) if dset.num_obs == 0: log.warn(f"Dataset '{id_}' is empty.") continue dset_vars[ "id"] = id_ #TODO: Better solution for handling of dataset variables? dset.vars.update( dset_vars ) # Necessary for example for getting correct file path in used writers. dsets.update({id_: dset}) elif specifier == "station": for station in items_: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) if dset.num_obs == 0: log.warn(f"Dataset '{station}' is empty.") continue dset_vars[ "station"] = station #TODO: Better solution for handling of dataset variables? dset.vars.update( dset_vars ) # Necessary for example for getting correct file path in used writers. dsets.update({station: dset}) elif specifier == "stage": for stage in items_: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) if dset.num_obs == 0: log.warn(f"Dataset '{stage}' is empty.") continue dset_vars[ "stage"] = stage #TODO: Better solution for handling of dataset variables? dset.vars.update( dset_vars ) # Necessary for example for getting correct file path in used writers. dsets.update({stage: dset}) else: log.fatal( f"Specifier {specifier} is not defined. It should be either 'id', 'station' or 'stage'." ) if len(dsets) == 0: log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].") elif len(dsets) == 1: log.warn( f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available." ) # Loop over writers for writer in writer_names: write(writer, dset=dsets)
def main(): """Parse command line options and loop over the Where analysis Do simple parsing of command line arguments. Set up config-files and potentially start the analysis. See the help docstring at the top of the file for more information about the workflow. """ util.check_help_and_version(doc_module=__name__) log.init(log_level=config.where.log.default_level.str, prefix="Runner") # Initialize pipeline = pipelines.get_from_options() config.read_pipeline(pipeline) if util.check_options("--doy"): from_date = util.parse_args("doy", doc_module=__name__) to_date = util.parse_args("doy", doc_module=__name__) sys.argv.remove("--doy") else: from_date = util.parse_args("date", doc_module=__name__) to_date = util.parse_args("date", doc_module=__name__) # Handle list of sessions session_list = set( util.read_option_value("--session", default="").replace(",", " ").split()) sys.argv = [o for o in sys.argv if not o.startswith("--session=")] # Start logging file_vars = dict(**util.get_user_info()) log.file_init( file_path=files.path("log_runner", file_vars=file_vars), log_level=config.where.log.default_level.str, prefix="Runner", rotation=config.where.log.number_of_log_backups.int, ) atexit.register(log_statistics) # Should where_runner crash if Where crashes? stop_on_error_opts = None if util.check_options("--stop-on-error"): stop_on_error_opts = True elif util.check_options("--continue-on-error"): stop_on_error_opts = False stop_on_error = config.where.get("stop_on_error", section="runner", value=stop_on_error_opts).bool error_logger = log.fatal if stop_on_error else log.error # Loop over dates rundate = from_date while rundate <= to_date: available_sessions = set(pipelines.list_sessions(rundate, pipeline)) sessions = available_sessions & session_list if session_list else available_sessions where_args = remove_runner_args(sys.argv[1:]) for session in sorted(sessions): cmd = f"{where.__executable__} {rundate:%Y %m %d} --session={session}".split( ) + where_args log.info(f"Running '{' '.join(cmd)}'") count("Number of analyses") try: subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except subprocess.CalledProcessError as err: count("Failed analyses") error_msg = err.stderr.decode().strip().split("\n")[-1] error_logger(f"Command '{' '.join(cmd)}' failed: {error_msg}") else: count("Successful analyses") copy_log_from_where(rundate, pipeline, session) rundate += timedelta(days=1)
def get_args(rundate, input_args=None): """Convert where_runner arguments to where arguments for given date Args: rundate (date): The model run date. Returns: List: Strings with names of available sessions. """ keyword = "--session" session_list = set() input_args = list(input_args) if input_args is not None else list() for idx in range(len(input_args)): key, _, value = input_args[idx].partition("=") if key == keyword: session_list = set(value.split(",")) input_args.pop(idx) break args = " ".join(input_args) get_session_from_master = config.where.get( "get_session_from_master", section=pipeline, value=util.read_option_value( "--get_session_from_master", default=None), # TODO: add this to mg_config default=False, ).bool if get_session_from_master: skip_sessions = set( config.where.get( "skip_sessions", section="runner", value=util.read_option_value("--skip_sessions", default=None), default="", ).list) session_types = config.where.get( "session_types", section="runner", value=util.read_option_value("--session_types", default=None), default="", ).list master_schedule = apriori.get("vlbi_master_schedule", rundate=rundate) sessions = set( master_schedule.list_sessions(rundate, session_types=session_types)) check_master_status = config.where.get( "check_master_status", section="runner", value=util.read_option_value("--check_master_status", default=None), default=False, ).bool not_ready_sessions = set() if check_master_status: for session in sessions: if not master_schedule.ready(rundate, session): status = master_schedule.status(rundate, session) log.warn( f"{rundate} {session} is not ready for processing. Master file status: '{status}'. Skipping session." ) not_ready_sessions.add(session) sessions = set(sessions) - skip_sessions - not_ready_sessions sessions = sessions & session_list if session_list else sessions return [keyword + "=" + s + " " + args for s in sessions] else: obs_format = config.tech.get( "obs_format", section=pipeline ).str # TODO: This always falls back on config.where .. file_vars = config.create_file_vars(rundate, pipeline, session=None) del file_vars[ "session"] # TODO: Do not add None variables to file_vars? sessions = config.files.glob_variable(f"vlbi_obs_{obs_format}", variable="session", pattern=r"\w{2}", file_vars=file_vars) sessions = sessions & session_list return [keyword + "=" + s + " " + args for s in sessions]
def main(date: "datedoy", tech: "pipeline", items: "option", specifier: "option"): log.init(log_level="info") dsets = dict() # Additional options stage = util.read_option_value("--stage") writer_names = util.read_option_value("--writers").replace(",", " ").split() items_ = [s.strip() for s in items.split(",")] # Get optional options dataset_id = util.read_option_value("--dset_id", default="last") dataset_id = "last" if dataset_id == "last" else int(dataset_id) dataset_name = util.read_option_value("--dset_name", default="") session = util.read_option_value("--session", default="") id_ = "-" + util.read_option_value( "--id", default="") if util.read_option_value("--id", default="") else "" # Read datasets for given specifier if specifier == "id": for id_ in items_: dset = data.Dataset(rundate=date, tech=tech, stage=stage, dataset_name=dataset_name, dataset_id=dataset_id, id="-" + id_) if dset.num_obs == 0: log.warn(f"Dataset '{id_}' is empty.") continue dsets.update({id_: dset}) elif specifier == "session": for session in items_: dset = data.Dataset(rundate=date, tech=tech, stage=stage, dataset_name=session, dataset_id=dataset_id, id=id_) if dset.num_obs == 0: log.warn(f"Dataset '{session}' is empty.") continue dsets.update({session: dset}) elif specifier == "stage": for stage in items_: dset = data.Dataset(rundate=date, tech=tech, stage=stage, dataset_name=dataset_name, dataset_id=dataset_id, id=id_) if dset.num_obs == 0: log.warn(f"Dataset '{stage}' is empty.") continue dsets.update({stage: dset}) else: log.fatal( f"Specifier {specifier} is not defined. It should be either 'id', 'session' or 'stage'." ) if len(dsets) == 0: log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].") elif len(dsets) == 1: log.warn( f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available." ) # Loop over writers for writer in writer_names: write(writer, dset=dsets)
def compare(date: "datedoy", pipeline: "pipeline", items: "option", specifier: "option"): log.init(log_level="info") dsets = dict() # Additional options stage = util.read_option_value("--stage") writer_names = util.read_option_value("--writers").replace(",", " ").split() items_ = [s.strip() for s in items.split(",")] # Get optional options label = util.read_option_value("--label", default="None") # TODO label = "last" if label == "last" else label station = util.read_option_value("--station", default="") id_ = util.read_option_value("--id", default="") # Update configuration of Where analysis config.where.update_from_options(_clean_sys_argv(pipeline)) # Get dataset variables dset_vars = config.create_file_vars(rundate=date, pipeline=pipeline) # Read datasets for given specifier if specifier == "id": for id_ in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset id '{id_}'.") continue dset.vars.update(dset_vars) dset.vars["id"] = id_ dsets.update({id_: dset}) elif specifier == "station": for station in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset station '{station}'.") continue dset.vars.update(dset_vars) dset.vars["station"] = station dsets.update({station: dset}) elif specifier == "stage": for stage in items_: try: dset = dataset.Dataset().read(rundate=date, pipeline=pipeline, stage=stage, label=label, id=id_, station=station) except OSError: log.warn(f"No data to read for Dataset stage '{stage}'.") continue dset.vars.update(dset_vars) dset.vars["stage"] = stage dsets.update({stage: dset}) else: log.fatal( f"Specifier {specifier} is not defined. It should be either 'id', 'station' or 'stage'." ) if len(dsets) == 0: log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].") elif len(dsets) == 1: log.warn( f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available." ) # Loop over writers for writer in writer_names: write(writer, dset=dsets)
def set_profile(pipeline): # Set the correct profile profile = util.read_option_value("--profile", default="") config.where.profiles = profile.split() + [pipeline] config.files.profiles = profile.split() + [pipeline]