def run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs): # Skip stages where no dependencies have changed dep_path = config.files.path("depends", file_vars={ **kwargs, "stage": stage }) if not (dependencies.changed(dep_path) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) return if dset is None: try: # Read dataset from disk if it exists dset = dataset.Dataset.read(rundate=rundate, pipeline=pipeline, stage=prev_stage, label="last", **kwargs) except (OSError, ValueError): # Create emtpy dataset dset = dataset.Dataset(rundate=rundate, pipeline=pipeline, **kwargs) # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path) if prev_stage is not None: dependencies.add(config.files.path("depends", file_vars={ **kwargs, "stage": prev_stage }), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_stage(stage, **kwargs) # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() return dset
def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up session config config.init(rundate=rundate, tech_name=pipeline, session=session) # Set up prefix for console logger and start file logger log_cfg = config.where.log prefix = f"{pipeline.upper()} {session} {rundate:%Y-%m-%d}" log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Find which stages we will run analysis for # TODO: Specify stage_list in config stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dset = None dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed dep_path = files.path("depends", file_vars=dict(stage=stage)) if not (dependencies.changed(dep_path, fast_check=dep_fast) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue elif dset is None: # Create or read dataset empty = stage == stage_list[0] dset = dataset.Dataset(rundate, tech=pipeline, stage=prev_stage, dataset_name=session, dataset_id="last", empty=empty) # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path, fast_check=dep_fast) dependencies.add(files.path("depends", file_vars=dict(stage=prev_stage)), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_from_file(stage=stage, dataset_id="all") # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() if dset.num_obs == 0: log.warn( f"No observations in dataset after {stage} stage. Exiting pipeline" ) break else: # Only done if loop does not break (all stages finish normally) # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()