def main(): """Parse command line options and run the Where analysis Do simple parsing of command line arguments. Set up config-files and start the analysis. See the help docstring at the top of the file for more information about the workflow. """ # Start logging log.init() # Read command line options if util.check_options("--doy"): rundate = util.parse_args("doy", doc_module=__name__) else: rundate = util.parse_args("date", doc_module=__name__) pipeline = pipelines.get_from_options() session = pipelines.get_session(rundate, pipeline) # Pretend to empty mailbox pretend_to_empty_mailbox() # Start an interactive session if util.check_options("-I", "--interactive"): from where.tools import interactive interactive.interactive(rundate, pipeline, session) return # Set up the configuration for a new analysis or update an existing one setup.setup_config(rundate, pipeline, session) # Run the analysis setup.add_timestamp(rundate, pipeline, session, "last run") with timer(f"Finish pipeline {pipeline.upper()} in"): pipelines.run(rundate, pipeline, session)
def profiler(func_to_profile): """Run a function with profiling turned on Args: func_to_profile (Function): Function that will be called after profiling is turned on. """ # Should we do line or function profiling? prof, info = _setup_line_profiler() if util.check_options( "--line_profile") else _setup_cprofile() # Read command line options filename = util.read_option_value("--profile_output", default="where") if not os.path.splitext(filename)[1]: filename = f"{filename}.{info['extension']}" # Start profiling and run Where as usual. Store and print profile even if Where crashes log.info(f"Enable {info['doc']}, output stored in {filename}") prof.enable() try: func_to_profile() finally: prof.disable() # Store profile to file inspect_str = f"Inspect it using e.g. {' or '.join(info['inspect'])}" log.info(("Profile information stored to {f}\n " + inspect_str).format(f=filename)) prof.dump_stats(filename) # Print profile to terminal if util.check_options("--show_profile"): prof.print_stats()
def concatenate(from_date: "datedoy", to_date: "datedoy", pipeline: "pipeline", stage: "option"): log.init(log_level="info") # Get options label = util.read_option_value("--label", default="None") # TODO: label = "last" if label == "last" else label id_ = util.read_option_value("--id", default="") only_for_rundate = True if util.check_options( "--only_for_rundate") else False session = util.read_option_value("--session", default="") station = util.read_option_value("--station", default="") writers = util.read_option_value("--writers", default="").replace(",", " ").split() # Update configuration of Where analysis config.where.update_from_options(_clean_sys_argv(pipeline)) dset_vars = dict(pipeline=pipeline, stage=stage, session=session, station=station, label=label, id=id_) dset_vars = config.create_file_vars(rundate=from_date, **dset_vars) dset = _concatenate_datasets(from_date, to_date, dset_vars, only_for_rundate) if dset.num_obs == 0: log.fatal(f"No data to read period from {from_date} to {to_date}.") dset.write() # Loop over writers for writer in writers: write(writer, dset=dset)
def validate_session(rundate, session): """Validate a session for the given rundate If session is not a valid VLBI session for the given rundate, an InvalidSessionError is raised. Args: rundate (date): The model run date. session (String): Name of session. Return: String: Name of validated session. """ if not session: if util.check_options("--session"): return session # Explicitly specified blank session, typically to open timeseries interactively raise exceptions.InvalidSessionError( "You must specify '--session=<...>' to run a VLBI analysis") # TODO: Can we use master files to validate sessions? What about intensives? master_schedule = apriori.get("vlbi_master_schedule", rundate=rundate) master_sessions = master_schedule.list_sessions(rundate) if session not in master_sessions: log.warn( f"Session '{session}' is not listed in master file for {rundate:{config.FMT_date}}. " f"Available sessions are {', '.join(master_sessions)}.") return session
def murks(log_text, *fmtargs, **fmtkws): """Write temporary debug message to log The message is parsed with str.format and only written to the log if `--murks` is specified on the command line. The message is not written to the file log. It is also possible to specify `--murks=prefix` on the command line. In this case, if the message starts with `prefix` an interactive session will be started after the murks message is logged. Args: log_text: String, will be parsed with str.format. fmtargs: Arguments passed on to str.format. fmtkws: Keyword arguments passed on to str.format. """ from where.lib import util if not util.check_options("--murks"): return # Log murks messages log("MURKS", log_text, *fmtargs, log_to_file=False, **fmtkws) # Start an interactive session (with variables from correct namespace) if message matches command line prefix interactive_prefix = util.read_option_value("--murks") if interactive_prefix is not None and log_text.startswith(interactive_prefix): import IPython frame = sys._getframe(1) namespace = dict(**frame.f_globals, **frame.f_locals) IPython.embed(user_ns=namespace)
def main(): """Parse command line options and set up an Where analysis Do simple parsing of command line arguments. Set up config-files and show the configuration. """ util.check_help_and_version(doc_module=__name__) # Start logging log.init() # Read command line options pipeline = pipelines.get_from_options() config.read_pipeline(pipeline) if util.check_options("--doy"): rundate = util.parse_args("doy", doc_module=__name__) else: rundate = util.parse_args("date", doc_module=__name__) session = pipelines.get_session(rundate, pipeline) # Set up the configuration for the analysis setup_config(rundate, pipeline, session) # Show current configuration show_config(rundate, pipeline, session) # Store configuration in library store_config_to_library(rundate, pipeline, session)
def pretend_to_empty_mailbox(): """Pretend to try to empty a mailbox This function simply prints an error message to stderr. No actual attempt to empty a mailbox is done. This is included purely for nostalgic (and possibly some misunderstood backwards compatibility) reasons :P """ if util.check_options("-M", "--mailbox"): print("/var/spool/mail/pha: Permission denied.", file=sys.stderr)
def main(): """Invoke where_tools To add a new tool, simply add a new .py-file with a registered plugin that will be called when the tool is called. """ # Start logging log.init(log_level="info") # First read tool from command line, don't use util.parse_args() as that overrides -h for each tool try: tool = [a for a in sys.argv[1:] if not a.startswith("-")][0] sys.argv.remove(tool) except IndexError: util._print_help_from_doc(__name__) raise SystemExit # Check that tool is available and figure out signature try: sig = plugins.signature(__name__, tool) tool_module = plugins.get(__name__, tool).function.__module__ except exceptions.UnknownPluginError as err: util._print_help_from_doc(__name__) err.args = ( f"{err.args[0]}\n Available tools are {', '.join(plugins.names(__name__))}", ) raise # Parse parameters util.check_help_and_version(doc_module=tool_module) tool_args = dict() for key, param in sig.parameters.items(): if param.annotation is None: raise SystemExit(f"{param} in {tool} tool is not annotated") if param.annotation == "datedoy": if util.check_options("--doy"): date = util.parse_args("doy", doc_module=__name__) else: date = util.parse_args("date", doc_module=__name__) tool_args[key] = date elif param.annotation == "pipeline": tool_args[key] = pipelines.get_from_options() config.read_pipeline(tool_args[key]) elif param.annotation == "option": tool_args[key] = util.read_option_value(f"--{key}") else: tool_args[key] = util.parse_args(param.annotation, doc_module=tool_module) # Call tool plugins.call(__name__, tool, **tool_args)
def setup_config(rundate, pipeline, *args, **kwargs): """Set up configuration for a Where analysis """ set_profile(pipeline) # Should a new analysis be started? start_new = util.check_options("-N", "--new") # Delete an analysis if util.check_options("-D", "--delete"): from where.tools import delete delete.delete_analysis(rundate, pipeline, **kwargs) if not start_new: raise SystemExit # Create configuration of a new analysis if start_new or not has_config(rundate, pipeline, *args, **kwargs): create_config(rundate, pipeline, *args, **kwargs) elif util.check_options( "--profile"): # Warning if --profile option is ignored profile_opt = f"--profile={util.read_option_value('--profile', default='')}" log.warn( f"Configuration already exists, option '{profile_opt}' ignored") # Update configuration based on command line options unused_options = update_config(rundate, pipeline, *args, **kwargs) # Edit configuration manually if util.check_options("-E", "--edit"): edit_config(rundate, pipeline, *args, **kwargs) unused_options = [ opt for opt in unused_options if opt not in ("-E, --edit") ] # Show current configuration if util.check_options("-S", "--show-config"): show_config(rundate, pipeline, *args, **kwargs) raise SystemExit return unused_options
def main(): """Parse command line options and run the Where analysis Do simple parsing of command line arguments. Set up config-files and start the analysis. See the help docstring at the top of the file for more information about the workflow. """ util.check_help_and_version(doc_module=__name__) # Start logging log.init(config.where.log.default_level.str) log.debug( f"Use {util.get_python_version()} on process {util.get_pid_and_server()}" ) # Read command line options pipeline = pipelines.get_from_options() config.read_pipeline(pipeline) if util.check_options("--doy"): rundate = util.parse_args("doy", doc_module=__name__) else: rundate = util.parse_args("date", doc_module=__name__) args, kwargs = util.options2args(sys.argv[1:]) # Start an interactive session if util.check_options("-I", "--interactive"): from where.tools import interactive # Local import because interactive imports many external packages interactive.interactive(rundate, pipeline, **kwargs) return # Set up the configuration for a new analysis or update an existing one unused_options = setup.setup_config(rundate, pipeline, *args, **kwargs) pipeline_args, pipeline_kwargs = util.options2args(unused_options) # Run the analysis setup.add_timestamp(rundate, pipeline, "last run", **kwargs) with Timer(f"Finish pipeline {pipeline.upper()} in"): pipelines.run(rundate, pipeline, *pipeline_args, **pipeline_kwargs)
def setup_config(rundate, pipeline, session): """Set up configuration for a Where analysis """ # Set the correct profile profile = util.read_option_value("--profile", default="") config.where.profiles = profile.split() + [pipeline] # Should a new analysis be started? start_new = util.check_options("-N", "--new") # Delete an analysis if util.check_options("-D", "--delete"): from where.tools import delete delete.delete_analysis(rundate, pipeline, session) if not start_new: raise SystemExit # Create configuration of a new analysis if start_new or not has_config(rundate, pipeline, session): create_config(rundate, pipeline, session) elif util.check_options( "--profile"): # Warning if --profile option is ignored profile_opt = f"--profile={util.read_option_value('--profile', default='')}" log.warn( f"Configuration already exists, option '{profile_opt}' ignored") # Update configuration based on command line options update_config(rundate, pipeline, session) # Edit configuration manually if util.check_options("-E", "--edit"): edit_config(rundate, pipeline, session) # Show current configuration if util.check_options("-S", "--show-config"): show_config(rundate, pipeline, session) raise SystemExit
def run_stage(rundate, pipeline, dset, stage, prev_stage, **kwargs): # Skip stages where no dependencies have changed dep_path = config.files.path("depends", file_vars={ **kwargs, "stage": stage }) if not (dependencies.changed(dep_path) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) return if dset is None: try: # Read dataset from disk if it exists dset = dataset.Dataset.read(rundate=rundate, pipeline=pipeline, stage=prev_stage, label="last", **kwargs) except (OSError, ValueError): # Create emtpy dataset dset = dataset.Dataset(rundate=rundate, pipeline=pipeline, **kwargs) # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path) if prev_stage is not None: dependencies.add(config.files.path("depends", file_vars={ **kwargs, "stage": prev_stage }), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_stage(stage, **kwargs) # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() return dset
def get_from_options(): """Read pipeline from command line options Uses the `options`-plugin in each pipeline to find which pipeline is called. Raises UnknownPipelineError if there are no recognized pipelines in the command line options. Returns: String: Name of pipeline. """ available_pipelines = options() pipeline_option = util.check_options(*available_pipelines) if not pipeline_option: raise exceptions.UnknownPipelineError( f"No pipeline specified.\nUse one of {', '.join(available_pipelines)} to specify the pipeline" ) return available_pipelines[pipeline_option]
def concatenate(from_date: "datedoy", to_date: "datedoy", tech: "pipeline", stage: "option"): log.init(log_level="info") # Get options dataset_id = util.read_option_value("--dset_id", default="last") dataset_id = "last" if dataset_id == "last" else int(dataset_id) dataset_name = util.read_option_value("--dset_name", default="") id = util.read_option_value("--id", default="") only_for_rundate = True if util.check_options( "--only_for_rundate") else False session = util.read_option_value("--session", default="") writer_names = util.read_option_value("--writers", default="").replace(",", " ").split() # Update configuration of Where analysis config.where.update_from_options(_clean_sys_argv(tech)) dset_vars = dict( tech=tech, stage=stage, session=session, dataset_name=session, dataset_id=dataset_id, session_name=id + "_concatenated", ) dset = _concatenate_datasets(from_date, to_date, dset_vars, only_for_rundate) if dset.num_obs == 0: log.fatal(f"No data to read period from {from_date} to {to_date}.") dset.write() # Loop over writers for writer in writer_names: write(writer, dset=dset)
def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up tech config and file logging config.init(rundate=rundate, tech_name=pipeline, session=session) log.file_init(log_path=files.path("log")) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: files.use_filelist_profiles(*filekey_suffix) # Find which stages we will run analysis for stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed if not (dependencies.changed(fast_check=dep_fast, rundate=rundate, tech=pipeline, session=session, stage=stage) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(fast_check=dep_fast, session=session, stage=stage) dependencies.add( files.path("model_run_depends", file_vars=dict(session=session, stage=prev_stage))) dependencies.add(*config.tech.sources) # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) do_next_stage = call(pipeline, stage, rundate=rundate, session=session, prev_stage=prev_stage, stage=stage, logger=log.info) dependencies.write() if do_next_stage is False: break # TODO, this does not work together with dependencies changed ... # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()
def run(rundate, pipeline, session=""): """Run a Where pipeline for a given date and session Args: rundate: Rundate of analysis. pipeline: Pipeline used for analysis. session: Session in analysis. """ if not setup.has_config(rundate, pipeline, session): log.fatal( f"No configuration found for {pipeline.upper()} {session} {rundate.strftime(config.FMT_date)}" ) # Set up session config config.init(rundate=rundate, tech_name=pipeline, session=session) # Set up prefix for console logger and start file logger log_cfg = config.where.log prefix = f"{pipeline.upper()} {session} {rundate:%Y-%m-%d}" log.init(log_level=log_cfg.default_level.str, prefix=prefix) if log_cfg.log_to_file.bool: log.file_init( file_path=files.path("log"), log_level=log_cfg.default_level.str, prefix=prefix, rotation=log_cfg.number_of_log_backups.int, ) # Read which stages to skip from technique configuration file. skip_stages = config.tech.get("skip_stages", default="").list # Register filekey suffix filekey_suffix = config.tech.filekey_suffix.list if filekey_suffix: config.files.profiles = filekey_suffix # Find which stages we will run analysis for # TODO: Specify stage_list in config stage_list = [s for s in stages(pipeline) if s not in skip_stages] # Start file logging and reporting reports.report.init(sessions=[session]) reports.report.start_session(session) reports.report.text("header", session.replace("_", " ").title()) # Update analysis config and file variables config.set_analysis(rundate=rundate, tech=pipeline, analysis=pipeline, session=session) config.set_file_vars(file_vars()) # Log the name of the session log.blank() # Empty line for visual clarity log.info(f"Start session {session}") session_timer = timer(f"Finish session {session} in") session_timer.start() # Run stages, keep track of previous stage dset = None dep_fast = config.where.files.dependencies_fast.bool for prev_stage, stage in zip([None] + stage_list, stage_list): # Skip stages where no dependencies have changed dep_path = files.path("depends", file_vars=dict(stage=stage)) if not (dependencies.changed(dep_path, fast_check=dep_fast) or util.check_options("-F", "--force")): log.info( f"Not necessary to run {stage} for {pipeline.upper()} {rundate.strftime(config.FMT_date)}" ) continue elif dset is None: # Create or read dataset empty = stage == stage_list[0] dset = dataset.Dataset(rundate, tech=pipeline, stage=prev_stage, dataset_name=session, dataset_id="last", empty=empty) # Report on the stage reports.report.start_section(stage) reports.report.text("header", stage.replace("_", " ").title()) if prev_stage: log.blank() # Empty line for visual clarity # Set up dependencies. Add dependencies to previous stage and config file dependencies.init(dep_path, fast_check=dep_fast) dependencies.add(files.path("depends", file_vars=dict(stage=prev_stage)), label="depends") dependencies.add(*config.tech.sources, label="config") # Delete old datasets for this stage dset.delete_from_file(stage=stage, dataset_id="all") # Call the current stage. Skip rest of stages if current stage returns False (compare with is since by # default stages return None) plugins.call(package_name=__name__, plugin_name=pipeline, part=stage, stage=stage, dset=dset, plugin_logger=log.info) dependencies.write() if dset.num_obs == 0: log.warn( f"No observations in dataset after {stage} stage. Exiting pipeline" ) break else: # Only done if loop does not break (all stages finish normally) # Publish files for session files.publish_files() session_timer.end() # Store configuration to library setup.store_config_to_library(rundate, pipeline, session) # Write reports specified in config reports.write(rundate, pipeline) # Write requirements to file for reproducibility util.write_requirements()
def main(): """Parse command line options and loop over the Where analysis Do simple parsing of command line arguments. Set up config-files and potentially start the analysis. See the help docstring at the top of the file for more information about the workflow. """ util.check_help_and_version(doc_module=__name__) log.init(log_level=config.where.log.default_level.str, prefix="Runner") # Initialize pipeline = pipelines.get_from_options() config.read_pipeline(pipeline) if util.check_options("--doy"): from_date = util.parse_args("doy", doc_module=__name__) to_date = util.parse_args("doy", doc_module=__name__) sys.argv.remove("--doy") else: from_date = util.parse_args("date", doc_module=__name__) to_date = util.parse_args("date", doc_module=__name__) # Handle list of sessions session_list = set( util.read_option_value("--session", default="").replace(",", " ").split()) sys.argv = [o for o in sys.argv if not o.startswith("--session=")] # Start logging file_vars = dict(**util.get_user_info()) log.file_init( file_path=files.path("log_runner", file_vars=file_vars), log_level=config.where.log.default_level.str, prefix="Runner", rotation=config.where.log.number_of_log_backups.int, ) atexit.register(log_statistics) # Should where_runner crash if Where crashes? stop_on_error_opts = None if util.check_options("--stop-on-error"): stop_on_error_opts = True elif util.check_options("--continue-on-error"): stop_on_error_opts = False stop_on_error = config.where.get("stop_on_error", section="runner", value=stop_on_error_opts).bool error_logger = log.fatal if stop_on_error else log.error # Loop over dates rundate = from_date while rundate <= to_date: available_sessions = set(pipelines.list_sessions(rundate, pipeline)) sessions = available_sessions & session_list if session_list else available_sessions where_args = remove_runner_args(sys.argv[1:]) for session in sorted(sessions): cmd = f"{where.__executable__} {rundate:%Y %m %d} --session={session}".split( ) + where_args log.info(f"Running '{' '.join(cmd)}'") count("Number of analyses") try: subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except subprocess.CalledProcessError as err: count("Failed analyses") error_msg = err.stderr.decode().strip().split("\n")[-1] error_logger(f"Command '{' '.join(cmd)}' failed: {error_msg}") else: count("Successful analyses") copy_log_from_where(rundate, pipeline, session) rundate += timedelta(days=1)