Esempio n. 1
0
def setup_config(rundate, pipeline, *args, **kwargs):
    """Set up configuration for a Where analysis

    """
    set_profile(pipeline)

    # Should a new analysis be started?
    start_new = util.check_options("-N", "--new")

    # Delete an analysis
    if util.check_options("-D", "--delete"):
        from where.tools import delete

        delete.delete_analysis(rundate, pipeline, **kwargs)
        if not start_new:
            raise SystemExit

    # Create configuration of a new analysis
    if start_new or not has_config(rundate, pipeline, *args, **kwargs):
        create_config(rundate, pipeline, *args, **kwargs)
    elif util.check_options(
            "--profile"):  # Warning if --profile option is ignored
        profile_opt = f"--profile={util.read_option_value('--profile', default='')}"
        log.warn(
            f"Configuration already exists, option '{profile_opt}' ignored")

    # Update configuration based on command line options
    unused_options = update_config(rundate, pipeline, *args, **kwargs)

    # Edit configuration manually
    if util.check_options("-E", "--edit"):
        edit_config(rundate, pipeline, *args, **kwargs)
        unused_options = [
            opt for opt in unused_options if opt not in ("-E, --edit")
        ]

    # Show current configuration
    if util.check_options("-S", "--show-config"):
        show_config(rundate, pipeline, *args, **kwargs)
        raise SystemExit

    return unused_options
Esempio n. 2
0
def setup_config(rundate, pipeline, session):
    """Set up configuration for a Where analysis

    """
    # Set the correct profile
    profile = util.read_option_value("--profile", default="")
    config.where.profiles = profile.split() + [pipeline]

    # Should a new analysis be started?
    start_new = util.check_options("-N", "--new")

    # Delete an analysis
    if util.check_options("-D", "--delete"):
        from where.tools import delete

        delete.delete_analysis(rundate, pipeline, session)
        if not start_new:
            raise SystemExit

    # Create configuration of a new analysis
    if start_new or not has_config(rundate, pipeline, session):
        create_config(rundate, pipeline, session)
    elif util.check_options(
            "--profile"):  # Warning if --profile option is ignored
        profile_opt = f"--profile={util.read_option_value('--profile', default='')}"
        log.warn(
            f"Configuration already exists, option '{profile_opt}' ignored")

    # Update configuration based on command line options
    update_config(rundate, pipeline, session)

    # Edit configuration manually
    if util.check_options("-E", "--edit"):
        edit_config(rundate, pipeline, session)

    # Show current configuration
    if util.check_options("-S", "--show-config"):
        show_config(rundate, pipeline, session)
        raise SystemExit
Esempio n. 3
0
def run(rundate, pipeline, *args, **kwargs):
    """Run a Where pipeline for a given date and session

    Args:
        rundate:   Rundate of analysis.
        pipeline:  Pipeline used for analysis.
        session:   Session in analysis.
    """

    if not setup.has_config(rundate, pipeline, *args, **kwargs):
        log.fatal(
            f"No configuration found for {pipeline.upper()} {rundate.strftime(config.FMT_date)}"
        )

    # Set up config
    config.init(rundate, pipeline, **kwargs)

    # Register filekey suffix
    filekey_suffix = config.tech.filekey_suffix.list
    if filekey_suffix:
        config.files.profiles = filekey_suffix

    # Validate input arguments
    try:
        prefix = plugins.call(package_name=__name__,
                              plugin_name=pipeline,
                              part="validate_args",
                              rundate=rundate,
                              **kwargs)
    except mg_exceptions.UnknownPluginError:
        log.warn(
            f"Pipeline {pipeline} has not defined function 'validate_args'")
    except exceptions.InvalidArgsError as err:

        from where.tools import delete

        # Clean up {placeholder} directories created by config
        delete.delete_analysis(rundate, pipeline, **kwargs)
        log.fatal(err)

    # Set up console logger and start file logger
    try:
        prefix = plugins.call(package_name=__name__,
                              plugin_name=pipeline,
                              part="log_prefix",
                              rundate=rundate,
                              **kwargs)
    except mg_exceptions.UnknownPluginError:
        log.warn(f"Pipeline {pipeline} has not defined function 'log_prefix'")
        prefix = ""

    log_cfg = config.where.log
    log.init(log_level=log_cfg.default_level.str, prefix=prefix)
    if log_cfg.log_to_file.bool:
        log.file_init(
            file_path=config.files.path("log"),
            log_level=log_cfg.default_level.str,
            prefix=prefix,
            rotation=log_cfg.number_of_log_backups.int,
        )

    # Update analysis config and file variables
    config.set_analysis(rundate, pipeline=pipeline, **kwargs)
    config.set_file_vars(file_vars())

    log.blank()  # Empty line for visual clarity

    # Read which stages that should be executed once for each iterable
    skip_stages = config.tech.skip_stages.list
    stage_iterate = config.tech.stage_iterate.list
    dset_list = []
    dset = None

    if stage_iterate:
        # Read which list should be iterated over and the placeholder name of each entry
        iterate_over, _, var_name = config.tech.stage_iterate_over.str.partition(
            ":")
        var_name = var_name.strip()

        # Iterate
        for item in config.tech[iterate_over].list:
            kwargs[var_name] = item
            log.blank()
            log.info(f"***** Running {item} *****")

            for prev_stage, stage in zip([None] + stage_iterate,
                                         stage_iterate):
                if stage not in skip_stages:
                    dset = run_stage(rundate, pipeline, dset, stage,
                                     prev_stage, **kwargs)

            if dset is not None:
                dset_list.append(dset)
                dset = None
        kwargs[var_name] = "combined"

    if dset_list:
        dset_list[0].merge_with(*dset_list[1:], sort_by="time")
        dset = dset_list[0]
        if len(dset_list) > 1:
            log.info(f"Combining dataset for {len(dset_list)} {iterate_over}")
            dset.write_as(stage=stage_iterate[-1], label=2, **kwargs)

    # Read which stages that should be executed once
    stage_once = config.tech.stage_once.list
    # Find which stages we will run analysis for
    if not stage_once and not stage_iterate:
        stage_list = [s for s in stages(pipeline)]
        prev_stage_start = None
    else:
        stage_list = [s for s in stage_once]
        prev_stage_start = stage_iterate[-1] if stage_iterate else None

    for prev_stage, stage in zip([prev_stage_start] + stage_list, stage_list):
        if stage not in skip_stages:
            dset = run_stage(rundate, pipeline, dset, stage, prev_stage,
                             **kwargs)
            log.blank()

        if dset is not None and dset.num_obs == 0:
            log.warn(f"No observations in dataset after {stage} stage.")
            break

    # Store configuration to library
    setup.store_config_to_library(rundate, pipeline, **kwargs)

    # Write requirements to file for reproducibility
    util.write_requirements()