예제 #1
0
파일: __init__.py 프로젝트: mfkiwl/where
def run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs):
    # Skip stages where no dependencies have changed
    dep_path = config.files.path("depends",
                                 file_vars={
                                     **kwargs, "stage": stage
                                 })
    if not (dependencies.changed(dep_path)
            or util.check_options("-F", "--force")):
        log.info(
            f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}"
        )
        return

    if dset is None:
        try:
            # Read dataset from disk if it exists
            dset = dataset.Dataset.read(rundate=rundate,
                                        pipeline=pipeline,
                                        stage=prev_stage,
                                        label="last",
                                        **kwargs)
        except (OSError, ValueError):
            # Create emtpy dataset
            dset = dataset.Dataset(rundate=rundate,
                                   pipeline=pipeline,
                                   **kwargs)

    # Set up dependencies. Add dependencies to previous stage and config file
    dependencies.init(dep_path)
    if prev_stage is not None:
        dependencies.add(config.files.path("depends",
                                           file_vars={
                                               **kwargs, "stage": prev_stage
                                           }),
                         label="depends")
    dependencies.add(*config.tech.sources, label="config")
    # Delete old datasets for this stage
    dset.delete_stage(stage, **kwargs)

    # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by
    # default stages return None)
    plugins.call(package_name=__name__,
                 plugin_name=pipeline,
                 part=stage,
                 stage=stage,
                 dset=dset,
                 plugin_logger=log.info)
    dependencies.write()

    return dset
예제 #2
0
def run(rundate, pipeline, session=""):
    """Run a Where pipeline for a given date and session

    Args:
        rundate:   Rundate of analysis.
        pipeline:  Pipeline used for analysis.
        session:   Session in analysis.
    """
    if not setup.has_config(rundate, pipeline, session):
        log.fatal(
            f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}"
        )

    # Set up session config
    config.init(rundate=rundate, tech_name=pipeline, session=session)

    # Set up prefix for console logger and start file logger
    log_cfg = config.where.log
    prefix = f"{pipeline.upper()} {session} {rundate:%Y-%m-%d}"
    log.init(log_level=log_cfg.default_level.str, prefix=prefix)
    if log_cfg.log_to_file.bool:
        log.file_init(
            file_path=files.path("log"),
            log_level=log_cfg.default_level.str,
            prefix=prefix,
            rotation=log_cfg.number_of_log_backups.int,
        )

    # Read which stages to skip from technique configuration file.
    skip_stages = config.tech.get("skip_stages", default="").list

    # Register filekey suffix
    filekey_suffix = config.tech.filekey_suffix.list
    if filekey_suffix:
        config.files.profiles = filekey_suffix

    # Find which stages we will run analysis for
    # TODO: Specify stage_list in config
    stage_list = [s for s in stages(pipeline) if s not in skip_stages]

    # Start file logging and reporting
    reports.report.init(sessions=[session])
    reports.report.start_session(session)
    reports.report.text("header", session.replace("_", " ").title())

    # Update analysis config and file variables
    config.set_analysis(rundate=rundate,
                        tech=pipeline,
                        analysis=pipeline,
                        session=session)
    config.set_file_vars(file_vars())

    # Log the name of the session
    log.blank()  # Empty line for visual clarity
    log.info(f"Start session {session}")
    session_timer = timer(f"Finish session {session} in")
    session_timer.start()

    # Run stages, keep track of previous stage
    dset = None
    dep_fast = config.where.files.dependencies_fast.bool
    for prev_stage, stage in zip([None] + stage_list, stage_list):

        # Skip stages where no dependencies have changed
        dep_path = files.path("depends", file_vars=dict(stage=stage))
        if not (dependencies.changed(dep_path, fast_check=dep_fast)
                or util.check_options("-F", "--force")):
            log.info(
                f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}"
            )
            continue
        elif dset is None:
            # Create or read dataset
            empty = stage == stage_list[0]
            dset = dataset.Dataset(rundate,
                                   tech=pipeline,
                                   stage=prev_stage,
                                   dataset_name=session,
                                   dataset_id="last",
                                   empty=empty)

        # Report on the stage
        reports.report.start_section(stage)
        reports.report.text("header", stage.replace("_", " ").title())
        if prev_stage:
            log.blank()  # Empty line for visual clarity

        # Set up dependencies. Add dependencies to previous stage and config file
        dependencies.init(dep_path, fast_check=dep_fast)
        dependencies.add(files.path("depends",
                                    file_vars=dict(stage=prev_stage)),
                         label="depends")
        dependencies.add(*config.tech.sources, label="config")

        # Delete old datasets for this stage
        dset.delete_from_file(stage=stage, dataset_id="all")

        # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by
        # default stages return None)
        plugins.call(package_name=__name__,
                     plugin_name=pipeline,
                     part=stage,
                     stage=stage,
                     dset=dset,
                     plugin_logger=log.info)
        dependencies.write()
        if dset.num_obs == 0:
            log.warn(
                f"No observations in dataset after {stage} stage. Exiting pipeline"
            )
            break
    else:  # Only done if loop does not break (all stages finish normally)
        # Publish files for session
        files.publish_files()

    session_timer.end()

    # Store configuration to library
    setup.store_config_to_library(rundate, pipeline, session)

    # Write reports specified in config
    reports.write(rundate, pipeline)

    # Write requirements to file for reproducibility
    util.write_requirements()