Exemple #1
0
def setup_config(rundate, pipeline, session):
    """Set up configuration for a Where analysis

    """
    # Set the correct profile
    profile = util.read_option_value("--profile", default="")
    config.where.profiles = profile.split() + [pipeline]

    # Should a new analysis be started?
    start_new = util.check_options("-N", "--new")

    # Delete an analysis
    if util.check_options("-D", "--delete"):
        delete.delete_analysis(rundate, pipeline, session)
        if not start_new:
            raise SystemExit

    # Create configuration of a new analysis
    if start_new or not has_config(rundate, pipeline, session):
        create_config(rundate, pipeline, session)
    elif util.check_options(
            "--profile"):  # Warning if --profile option is ignored
        profile_opt = f"--profile={util.read_option_value('--profile', default='')}"
        log.warn(
            f"Configuration already exists, option '{profile_opt}' ignored")

    # Update configuration based on command line options
    update_config(rundate, pipeline, session)

    # Edit configuration manually
    if util.check_options("-E", "--edit"):
        edit_config(rundate, pipeline, session)

    # Show current configuration
    if util.check_options("-S", "--show-config"):
        show_config(rundate, pipeline, session)
        raise SystemExit
Exemple #2
0
def main(date: "datedoy", tech: "pipeline", ids: "option"):
    log.init(log_level="info")

    # Additional required options
    identifiers = [id_.strip() for id_ in ids.split(",")]
    difference_by = util.read_option_value("--difference_by").replace(",", " ").split()
    stage = util.read_option_value("--stage")

    # Get optional options
    dataset_id = util.read_option_value("--dset_id", default="last")
    dataset_id = "last" if dataset_id == "last" else int(dataset_id)
    dataset_name = util.read_option_value("--dset_name", default="")
    writer_names = util.read_option_value("--writers", default="").replace(",", " ").split()
    session = util.read_option_value("--session", default="")

    # Get datasets
    dset = data.Dataset(
        rundate=date, tech=tech, stage=stage, dataset_name=dataset_name, dataset_id=dataset_id, id="-" + identifiers[0]
    )

    dset_other = data.Dataset(
        rundate=date, tech=tech, stage=stage, dataset_name=dataset_name, dataset_id=dataset_id, id="-" + identifiers[1]
    )

    if dset.num_obs == 0:
        log.warn(f"Nothing to differentiate. Dataset '{identifiers[0]}' is empty.")
        return 1

    if dset_other.num_obs == 0:
        log.warn(f"Nothing to differentiate. Dataset '{identifiers[1]}' is empty.")
        return 1

    # Differentiate dataset
    dset_diff = dset.difference_with(dset_other, difference_by=difference_by)
    dset_diff.write_as(stage="difference")

    # Loop over writers
    for writer in writer_names:
        write(writer, dset_diff)
Exemple #3
0
def main(date: "datedoy", pipeline: "pipeline", items: "option",
         specifier: "option"):
    log.init(log_level="info")
    dsets = dict()

    # Additional options
    stage = util.read_option_value("--stage")
    writer_names = util.read_option_value("--writers").replace(",",
                                                               " ").split()
    items_ = [s.strip() for s in items.split(",")]

    # Get optional options
    label = util.read_option_value("--label", default="None")
    # TODO label = "last" if label == "last" else label
    station = util.read_option_value("--station", default="")
    id_ = util.read_option_value("--id", default="")

    # Get dataset variables
    dset_vars = dict(pipeline=pipeline,
                     stage=stage,
                     station=station,
                     label=label,
                     id=id_)
    dset_vars = config.create_file_vars(rundate=date, **dset_vars)

    # Read datasets for given specifier
    if specifier == "id":
        for id_ in items_:
            dset = dataset.Dataset().read(rundate=date,
                                          pipeline=pipeline,
                                          stage=stage,
                                          label=label,
                                          id=id_,
                                          station=station)
            if dset.num_obs == 0:
                log.warn(f"Dataset '{id_}' is empty.")
                continue
            dset_vars[
                "id"] = id_  #TODO: Better solution for handling of dataset variables?
            dset.vars.update(
                dset_vars
            )  # Necessary for example for getting correct file path in used writers.
            dsets.update({id_: dset})

    elif specifier == "station":
        for station in items_:
            dset = dataset.Dataset().read(rundate=date,
                                          pipeline=pipeline,
                                          stage=stage,
                                          label=label,
                                          id=id_,
                                          station=station)
            if dset.num_obs == 0:
                log.warn(f"Dataset '{station}' is empty.")
                continue
            dset_vars[
                "station"] = station  #TODO: Better solution for handling of dataset variables?
            dset.vars.update(
                dset_vars
            )  # Necessary for example for getting correct file path in used writers.
            dsets.update({station: dset})

    elif specifier == "stage":
        for stage in items_:
            dset = dataset.Dataset().read(rundate=date,
                                          pipeline=pipeline,
                                          stage=stage,
                                          label=label,
                                          id=id_,
                                          station=station)
            if dset.num_obs == 0:
                log.warn(f"Dataset '{stage}' is empty.")
                continue
            dset_vars[
                "stage"] = stage  #TODO: Better solution for handling of dataset variables?
            dset.vars.update(
                dset_vars
            )  # Necessary for example for getting correct file path in used writers.
            dsets.update({stage: dset})
    else:
        log.fatal(
            f"Specifier {specifier} is not defined. It should be either 'id', 'station' or 'stage'."
        )

    if len(dsets) == 0:
        log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].")
    elif len(dsets) == 1:
        log.warn(
            f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available."
        )

    # Loop over writers
    for writer in writer_names:
        write(writer, dset=dsets)
Exemple #4
0
def main():
    """Parse command line options and loop over the Where analysis

    Do simple parsing of command line arguments. Set up config-files and potentially start the analysis. See the help
    docstring at the top of the file for more information about the workflow.
    """
    util.check_help_and_version(doc_module=__name__)
    log.init(log_level=config.where.log.default_level.str, prefix="Runner")

    # Initialize
    pipeline = pipelines.get_from_options()
    config.read_pipeline(pipeline)
    if util.check_options("--doy"):
        from_date = util.parse_args("doy", doc_module=__name__)
        to_date = util.parse_args("doy", doc_module=__name__)
        sys.argv.remove("--doy")
    else:
        from_date = util.parse_args("date", doc_module=__name__)
        to_date = util.parse_args("date", doc_module=__name__)

    # Handle list of sessions
    session_list = set(
        util.read_option_value("--session", default="").replace(",",
                                                                " ").split())
    sys.argv = [o for o in sys.argv if not o.startswith("--session=")]

    # Start logging
    file_vars = dict(**util.get_user_info())
    log.file_init(
        file_path=files.path("log_runner", file_vars=file_vars),
        log_level=config.where.log.default_level.str,
        prefix="Runner",
        rotation=config.where.log.number_of_log_backups.int,
    )
    atexit.register(log_statistics)

    # Should where_runner crash if Where crashes?
    stop_on_error_opts = None
    if util.check_options("--stop-on-error"):
        stop_on_error_opts = True
    elif util.check_options("--continue-on-error"):
        stop_on_error_opts = False
    stop_on_error = config.where.get("stop_on_error",
                                     section="runner",
                                     value=stop_on_error_opts).bool
    error_logger = log.fatal if stop_on_error else log.error

    # Loop over dates
    rundate = from_date
    while rundate <= to_date:
        available_sessions = set(pipelines.list_sessions(rundate, pipeline))
        sessions = available_sessions & session_list if session_list else available_sessions

        where_args = remove_runner_args(sys.argv[1:])
        for session in sorted(sessions):
            cmd = f"{where.__executable__} {rundate:%Y %m %d} --session={session}".split(
            ) + where_args
            log.info(f"Running '{' '.join(cmd)}'")
            count("Number of analyses")
            try:
                subprocess.run(cmd,
                               check=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
            except subprocess.CalledProcessError as err:
                count("Failed analyses")
                error_msg = err.stderr.decode().strip().split("\n")[-1]
                error_logger(f"Command '{' '.join(cmd)}' failed: {error_msg}")
            else:
                count("Successful analyses")
            copy_log_from_where(rundate, pipeline, session)

        rundate += timedelta(days=1)
Exemple #5
0
def get_args(rundate, input_args=None):
    """Convert where_runner arguments to where arguments for given date

    Args:
        rundate (date):   The model run date.

    Returns:
        List:   Strings with names of available sessions.
    """
    keyword = "--session"
    session_list = set()
    input_args = list(input_args) if input_args is not None else list()
    for idx in range(len(input_args)):
        key, _, value = input_args[idx].partition("=")
        if key == keyword:
            session_list = set(value.split(","))
            input_args.pop(idx)
            break
    args = " ".join(input_args)

    get_session_from_master = config.where.get(
        "get_session_from_master",
        section=pipeline,
        value=util.read_option_value(
            "--get_session_from_master",
            default=None),  # TODO: add this to mg_config
        default=False,
    ).bool

    if get_session_from_master:
        skip_sessions = set(
            config.where.get(
                "skip_sessions",
                section="runner",
                value=util.read_option_value("--skip_sessions", default=None),
                default="",
            ).list)

        session_types = config.where.get(
            "session_types",
            section="runner",
            value=util.read_option_value("--session_types", default=None),
            default="",
        ).list
        master_schedule = apriori.get("vlbi_master_schedule", rundate=rundate)
        sessions = set(
            master_schedule.list_sessions(rundate,
                                          session_types=session_types))

        check_master_status = config.where.get(
            "check_master_status",
            section="runner",
            value=util.read_option_value("--check_master_status",
                                         default=None),
            default=False,
        ).bool

        not_ready_sessions = set()
        if check_master_status:
            for session in sessions:
                if not master_schedule.ready(rundate, session):
                    status = master_schedule.status(rundate, session)
                    log.warn(
                        f"{rundate} {session} is not ready for processing. Master file status: '{status}'. Skipping session."
                    )
                    not_ready_sessions.add(session)

        sessions = set(sessions) - skip_sessions - not_ready_sessions
        sessions = sessions & session_list if session_list else sessions
        return [keyword + "=" + s + " " + args for s in sessions]
    else:
        obs_format = config.tech.get(
            "obs_format", section=pipeline
        ).str  # TODO: This always falls back on config.where ..
        file_vars = config.create_file_vars(rundate, pipeline, session=None)
        del file_vars[
            "session"]  # TODO: Do not add None variables to file_vars?
        sessions = config.files.glob_variable(f"vlbi_obs_{obs_format}",
                                              variable="session",
                                              pattern=r"\w{2}",
                                              file_vars=file_vars)
        sessions = sessions & session_list
        return [keyword + "=" + s + " " + args for s in sessions]
Exemple #6
0
def main(date: "datedoy", tech: "pipeline", items: "option",
         specifier: "option"):
    log.init(log_level="info")
    dsets = dict()

    # Additional options
    stage = util.read_option_value("--stage")
    writer_names = util.read_option_value("--writers").replace(",",
                                                               " ").split()
    items_ = [s.strip() for s in items.split(",")]

    # Get optional options
    dataset_id = util.read_option_value("--dset_id", default="last")
    dataset_id = "last" if dataset_id == "last" else int(dataset_id)
    dataset_name = util.read_option_value("--dset_name", default="")
    session = util.read_option_value("--session", default="")
    id_ = "-" + util.read_option_value(
        "--id", default="") if util.read_option_value("--id",
                                                      default="") else ""

    # Read datasets for given specifier
    if specifier == "id":
        for id_ in items_:
            dset = data.Dataset(rundate=date,
                                tech=tech,
                                stage=stage,
                                dataset_name=dataset_name,
                                dataset_id=dataset_id,
                                id="-" + id_)
            if dset.num_obs == 0:
                log.warn(f"Dataset '{id_}' is empty.")
                continue
            dsets.update({id_: dset})

    elif specifier == "session":
        for session in items_:
            dset = data.Dataset(rundate=date,
                                tech=tech,
                                stage=stage,
                                dataset_name=session,
                                dataset_id=dataset_id,
                                id=id_)
            if dset.num_obs == 0:
                log.warn(f"Dataset '{session}' is empty.")
                continue
            dsets.update({session: dset})

    elif specifier == "stage":
        for stage in items_:
            dset = data.Dataset(rundate=date,
                                tech=tech,
                                stage=stage,
                                dataset_name=dataset_name,
                                dataset_id=dataset_id,
                                id=id_)
            if dset.num_obs == 0:
                log.warn(f"Dataset '{stage}' is empty.")
                continue
            dsets.update({stage: dset})
    else:
        log.fatal(
            f"Specifier {specifier} is not defined. It should be either 'id', 'session' or 'stage'."
        )

    if len(dsets) == 0:
        log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].")
    elif len(dsets) == 1:
        log.warn(
            f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available."
        )

    # Loop over writers
    for writer in writer_names:
        write(writer, dset=dsets)
Exemple #7
0
def compare(date: "datedoy", pipeline: "pipeline", items: "option",
            specifier: "option"):
    log.init(log_level="info")
    dsets = dict()

    # Additional options
    stage = util.read_option_value("--stage")
    writer_names = util.read_option_value("--writers").replace(",",
                                                               " ").split()
    items_ = [s.strip() for s in items.split(",")]

    # Get optional options
    label = util.read_option_value("--label", default="None")
    # TODO label = "last" if label == "last" else label
    station = util.read_option_value("--station", default="")
    id_ = util.read_option_value("--id", default="")

    # Update configuration of Where analysis
    config.where.update_from_options(_clean_sys_argv(pipeline))

    # Get dataset variables
    dset_vars = config.create_file_vars(rundate=date, pipeline=pipeline)

    # Read datasets for given specifier
    if specifier == "id":
        for id_ in items_:
            try:
                dset = dataset.Dataset().read(rundate=date,
                                              pipeline=pipeline,
                                              stage=stage,
                                              label=label,
                                              id=id_,
                                              station=station)
            except OSError:
                log.warn(f"No data to read for Dataset id '{id_}'.")
                continue

            dset.vars.update(dset_vars)
            dset.vars["id"] = id_
            dsets.update({id_: dset})

    elif specifier == "station":
        for station in items_:

            try:
                dset = dataset.Dataset().read(rundate=date,
                                              pipeline=pipeline,
                                              stage=stage,
                                              label=label,
                                              id=id_,
                                              station=station)
            except OSError:
                log.warn(f"No data to read for Dataset station '{station}'.")
                continue

            dset.vars.update(dset_vars)
            dset.vars["station"] = station
            dsets.update({station: dset})

    elif specifier == "stage":
        for stage in items_:

            try:
                dset = dataset.Dataset().read(rundate=date,
                                              pipeline=pipeline,
                                              stage=stage,
                                              label=label,
                                              id=id_,
                                              station=station)
            except OSError:
                log.warn(f"No data to read for Dataset stage '{stage}'.")
                continue
            dset.vars.update(dset_vars)
            dset.vars["stage"] = stage
            dsets.update({stage: dset})
    else:
        log.fatal(
            f"Specifier {specifier} is not defined. It should be either 'id', 'station' or 'stage'."
        )

    if len(dsets) == 0:
        log.fatal(f"All given datasets are empty [{', '.join(dsets.keys())}].")
    elif len(dsets) == 1:
        log.warn(
            f"Nothing to compare. Only dataset '{list(dsets.keys())[0]}' is available."
        )

    # Loop over writers
    for writer in writer_names:
        write(writer, dset=dsets)
Exemple #8
0
def set_profile(pipeline):
    # Set the correct profile
    profile = util.read_option_value("--profile", default="")
    config.where.profiles = profile.split() + [pipeline]
    config.files.profiles = profile.split() + [pipeline]