def __init__(self, workspace, step, **kwargs): """ Initialize a new instance of a StepRecord. Used kwargs: workspace: The working directory of the record. status: The record's current execution state. jobid: A scheduler assigned job identifier. script: The main script used for executing the record. restart_script: Script to resume record execution (if applicable). to_be_scheduled: True if the record needs scheduling. False otherwise. step: The StudyStep that is represented by the record instance. restart_limit: Upper limit on the number of restart attempts. tmp_dir: A provided temp directory to write scripts to instead of step workspace. """ self.workspace = Variable("WORKSPACE", workspace) step.run["cmd"] = self.workspace.substitute(step.run["cmd"]) step.run["restart"] = self.workspace.substitute(step.run["restart"]) self.jobid = kwargs.get("jobid", []) self.script = kwargs.get("script", "") self.restart_script = kwargs.get("restart", "") self.to_be_scheduled = False self.step = step self.restart_limit = kwargs.get("restart_limit", 3) # Status Information self._num_restarts = 0 self._submit_time = None self._start_time = None self._end_time = None self.status = kwargs.get("status", State.INITIALIZED)
def __init__(self, name, description, studyenv=None, parameters=None, steps=None): """ Study object used to represent the full workflow of a study. Derived from the DAG data structure. Contains everything that a study requires to be expanded with the appropriate substitutions and with parameters inserted. This data structure should be the instance the future daemon loads in to track progress on a workflow. :param name: String representing the name of the Study. :param description: A text description of what the study does. :param steps: A list of StudySteps in proper workflow order. :param studyenv: A populated StudyEnvironment instance. :param parameters: A populated Parameters instance. :param outpath: The path where the output of the study is written. """ # The basic study information self.name = name self.description = description # Initialized the DAG so we have those structures to be used. super(Study, self).__init__() # We want deep copies so that properties don't change out from under # the Sudy data structure. self.environment = copy.deepcopy(studyenv) self.parameters = copy.deepcopy(parameters) # Isolate the OUTPUT_PATH variable. Even though it should be contained # in the environment, it needs to be tweaked for each combination of # parameters to isolate their workspaces. if self.environment: # Attempt to remove the OUTPUT_PATH from the environment. self.output = self.environment.find('OUTPUT_PATH') # If it doesn't exist, assume the current directory is our output # path. if self.output is None: out_path = os.path.abspath('./') self.output = Variable('OUTPUT_PATH', out_path, '$') self.environment.add(self.output) else: self.output.value = os.path.abspath(self.output.value) # Flag the study as not having been set up and add the source node. self._issetup = False self.add_node(SOURCE, None) # Settings for handling restarts and submission attempts. self._restart_limit = 0 self._submission_attempts = 0 # If the user specified a flow in the form of steps, copy those into # into the Study object. if steps: for step in steps: # Deep copy because it prevents modifications after the fact. self.add_step(copy.deepcopy(step))
def run_study(args): """Run a Maestro study.""" # Load the Specification try: spec = YAMLSpecification.load_specification(args.specification) except jsonschema.ValidationError as e: LOGGER.error(e.message) sys.exit(1) environment = spec.get_study_environment() steps = spec.get_study_steps() # Set up the output directory. out_dir = environment.remove("OUTPUT_PATH") if args.out: # If out is specified in the args, ignore OUTPUT_PATH. output_path = os.path.abspath(args.out) # If we are automatically launching, just set the input as yes. if os.path.exists(output_path): if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input( "Output path already exists. Would you like to overwrite " "it? [yn] ") if uinput.lower() in ACCEPTED_INPUT: print("Cleaning up existing out path...") shutil.rmtree(output_path) else: print("Opting to quit -- not cleaning up old out path.") sys.exit(0) else: if out_dir is None: # If we don't find OUTPUT_PATH in the environment, assume pwd. out_dir = os.path.abspath("./") else: # We just take the value from the environment. out_dir = os.path.abspath(out_dir.value) out_name = "{}_{}".format(spec.name.replace(" ", "_"), time.strftime("%Y%m%d-%H%M%S")) output_path = make_safe_path(out_dir, *[out_name]) environment.add(Variable("OUTPUT_PATH", output_path)) # Set up file logging create_parentdir(os.path.join(output_path, "logs")) log_path = os.path.join(output_path, "logs", "{}.log".format(spec.name)) LOG_UTIL.add_file_handler(log_path, LFORMAT, args.debug_lvl) # Check for pargs without the matching pgen if args.pargs and not args.pgen: msg = "Cannot use the 'pargs' parameter without specifying a 'pgen'!" LOGGER.exception(msg) raise ArgumentError(msg) # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Handle loading a custom ParameterGenerator if specified. if args.pgen: # 'pgen_args' has a default of an empty list, which should translate # to an empty dictionary. kwargs = create_dictionary(args.pargs) # Copy the Python file used to generate parameters. shutil.copy(args.pgen, output_path) # Add keywords and environment from the spec to pgen args. kwargs["OUTPUT_PATH"] = output_path kwargs["SPECROOT"] = spec_root # Load the parameter generator. parameters = load_parameter_generator(args.pgen, environment, kwargs) else: parameters = spec.get_parameters() # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps, out_path=output_path) # Check if the submission attempts is greater than 0: if args.attempts < 1: _msg = "Submission attempts must be greater than 0. " \ "'{}' provided.".format(args.attempts) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the throttle is zero or greater: if args.throttle < 0: _msg = "Submission throttle must be a value of zero or greater. " \ "'{}' provided.".format(args.throttle) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the restart limit is zero or greater: if args.rlimit < 0: _msg = "Restart limit must be a value of zero or greater. " \ "'{}' provided.".format(args.rlimit) LOGGER.error(_msg) raise ArgumentError(_msg) # Set up the study workspace and configure it for execution. study.setup_workspace() study.configure_study(throttle=args.throttle, submission_attempts=args.attempts, restart_limit=args.rlimit, use_tmp=args.usetmp, hash_ws=args.hashws, dry_run=args.dry) study.setup_environment() if args.dry: # If performing a dry run, drive sleep time down to generate scripts. sleeptime = 1 else: # else, use args to decide sleeptime sleeptime = args.sleeptime batch = {"type": "local"} if spec.batch: batch = spec.batch if "type" not in batch: batch["type"] = "local" # Copy the spec to the output directory shutil.copy(args.specification, study.output_path) # Use the Conductor's classmethod to store the study. Conductor.store_study(study) Conductor.store_batch(study.output_path, batch) # If we are automatically launching, just set the input as yes. if args.autoyes or args.dry: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input("Would you like to launch the study? [yn] ") if uinput.lower() in ACCEPTED_INPUT: if args.fg: # Launch in the foreground. LOGGER.info("Running Maestro Conductor in the foreground.") conductor = Conductor(study) conductor.initialize(batch, sleeptime) completion_status = conductor.monitor_study() conductor.cleanup() return completion_status.value else: # Launch manager with nohup log_path = make_safe_path(study.output_path, *["{}.txt".format(study.name)]) cmd = [ "nohup", "conductor", "-t", str(sleeptime), "-d", str(args.debug_lvl), study.output_path, ">", log_path, "2>&1" ] LOGGER.debug(" ".join(cmd)) start_process(" ".join(cmd)) print("Study launched successfully.") else: print("Study launch aborted.") return 0
class _StepRecord: """ A simple container object representing a workflow step record. The record contains all information used to generate associated scripts, and settings for execution of the record. The StepRecord is a utility class to the ExecutionGraph and maintains all information for any given step in the DAG. """ def __init__(self, workspace, step, **kwargs): """ Initialize a new instance of a StepRecord. Used kwargs: workspace: The working directory of the record. status: The record's current execution state. jobid: A scheduler assigned job identifier. script: The main script used for executing the record. restart_script: Script to resume record execution (if applicable). to_be_scheduled: True if the record needs scheduling. False otherwise. step: The StudyStep that is represented by the record instance. restart_limit: Upper limit on the number of restart attempts. tmp_dir: A provided temp directory to write scripts to instead of step workspace. """ self.workspace = Variable("WORKSPACE", workspace) step.run["cmd"] = self.workspace.substitute(step.run["cmd"]) step.run["restart"] = self.workspace.substitute(step.run["restart"]) self.jobid = kwargs.get("jobid", []) self.script = kwargs.get("script", "") self.restart_script = kwargs.get("restart", "") self.to_be_scheduled = False self.step = step self.restart_limit = kwargs.get("restart_limit", 3) # Status Information self._num_restarts = 0 self._submit_time = None self._start_time = None self._end_time = None self.status = kwargs.get("status", State.INITIALIZED) def setup_workspace(self): """Initialize the record's workspace.""" create_parentdir(self.workspace.value) def generate_script(self, adapter, tmp_dir=""): """ Generate the script for executing the workflow step. :param adapter: Instance of adapter to be used for script generation. :param tmp_dir: If specified, place generated script in the specified temp directory. """ if tmp_dir: scr_dir = tmp_dir else: scr_dir = self.workspace.value self.step.run["cmd"] = self.workspace.substitute(self.step.run["cmd"]) LOGGER.info("Generating script for %s into %s", self.name, scr_dir) self.to_be_scheduled, self.script, self.restart_script = \ adapter.write_script(scr_dir, self.step) LOGGER.info("Script: %s\nRestart: %s\nScheduled?: %s", self.script, self.restart_script, self.to_be_scheduled) def execute(self, adapter): self.mark_submitted() retcode, jobid = self._execute(adapter, self.script) if retcode == SubmissionCode.OK: self.jobid.append(jobid) return retcode def restart(self, adapter): retcode, jobid = self._execute(adapter, self.restart_script) if retcode == SubmissionCode.OK: self.jobid.append(jobid) return retcode @property def can_restart(self): """ Get whether or not the record can be restarted. :returns: True if the record has a restart command assigned to it. """ if self.restart_script: return True return False def _execute(self, adapter, script): if self.to_be_scheduled: srecord = adapter.submit(self.step, script, self.workspace.value) else: self.mark_running() ladapter = ScriptAdapterFactory.get_adapter("local")() srecord = ladapter.submit(self.step, script, self.workspace.value) retcode = srecord.submission_code jobid = srecord.job_identifier return retcode, jobid def mark_submitted(self): """Mark the submission time of the record.""" LOGGER.debug("Marking %s as submitted (PENDING) -- previously %s", self.name, self.status) self.status = State.PENDING if not self._submit_time: self._submit_time = round_datetime_seconds(datetime.now()) else: LOGGER.warning( "Cannot set the submission time of '%s' because it has " "already been set.", self.name) def mark_running(self): """Mark the start time of the record.""" LOGGER.debug("Marking %s as running (RUNNING) -- previously %s", self.name, self.status) self.status = State.RUNNING if not self._start_time: self._start_time = round_datetime_seconds(datetime.now()) def mark_end(self, state): """ Mark the end time of the record with associated termination state. :param state: State enum corresponding to termination state. """ LOGGER.debug("Marking %s as finished (%s) -- previously %s", self.name, state, self.status) self.status = state if not self._end_time: self._end_time = round_datetime_seconds(datetime.now()) def mark_restart(self): """Mark the end time of the record.""" LOGGER.debug("Marking %s as restarting (TIMEOUT) -- previously %s", self.name, self.status) self.status = State.TIMEDOUT # Designating a restart limit of zero as an unlimited restart setting. # Otherwise, if we're less than restart limit, attempt another restart. if self.restart_limit == 0 or \ self._num_restarts < self.restart_limit: self._num_restarts += 1 return True else: return False @property def is_local_step(self): """Return whether or not this step executes locally.""" return not self.to_be_scheduled @property def elapsed_time(self): """Compute the elapsed time of the record (includes queue wait).""" if self._submit_time and self._end_time: # Return the total elapsed time. return get_duration(self._end_time - self._submit_time) elif self._submit_time and self.status == State.RUNNING: # Return the current elapsed time. return get_duration(datetime.now() - self._submit_time) else: return "--:--:--" @property def run_time(self): """ Compute the run time of a record (includes restart queue time). :returns: A string of the records's run time. """ if self._start_time and self._end_time: # If start and end time is set -- calculate run time. return get_duration(self._end_time - self._start_time) elif self._start_time and not self.status == State.RUNNING: # If start time but no end time, calculate current duration. return get_duration(datetime.now() - self._start_time) else: # Otherwise, return an uncalculated marker. return "--:--:--" @property def name(self): """ Get the name of the step represented by the record instance. :returns: The name of the StudyStep contained within the record. """ return self.step.name @property def walltime(self): """ Get the requested wall time of the record instance. :returns: A string representing the requested computing time. """ return self.step.run["walltime"] @property def time_submitted(self): """ Get the time the step started. :returns: A formatted string of the date and time the step started. """ if self._submit_time: return str(self._submit_time) else: return "--" @property def time_start(self): """ Get the time the step started. :returns: A formatted string of the date and time the step started. """ if self._start_time: return str(self._start_time) else: return "--" @property def time_end(self): """ Get the time the step ended. :returns: A formatted string of the date and time the step ended. """ if self._end_time: return str(self._end_time) else: return "--" @property def restarts(self): """ Get the number of restarts the step has executed. :returns: An int representing the number of restarts. """ return self._num_restarts
def run_study(args): """Run a Maestro study.""" # Load the Specification spec = YAMLSpecification.load_specification(args.specification) environment = spec.get_study_environment() steps = spec.get_study_steps() # Set up the output directory. out_dir = environment.remove("OUTPUT_PATH") if args.out: # If out is specified in the args, ignore OUTPUT_PATH. output_path = os.path.abspath(args.out) # If we are automatically launching, just set the input as yes. if os.path.exists(output_path): if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input( "Output path already exists. Would you like to overwrite " "it? [yn] ") if uinput.lower() in ACCEPTED_INPUT: print("Cleaning up existing out path...") shutil.rmtree(output_path) else: print("Opting to quit -- not cleaning up old out path.") sys.exit(0) else: if out_dir is None: # If we don't find OUTPUT_PATH in the environment, assume pwd. out_dir = os.path.abspath("./") else: # We just take the value from the environment. out_dir = os.path.abspath(out_dir.value) out_name = "{}_{}".format(spec.name.replace(" ", "_"), time.strftime("%Y%m%d-%H%M%S")) output_path = make_safe_path(out_dir, *[out_name]) environment.add(Variable("OUTPUT_PATH", output_path)) # Now that we know outpath, set up logging. setup_logging(args, output_path, spec.name.replace(" ", "_").lower()) # Check for pargs without the matching pgen if args.pargs and not args.pgen: msg = "Cannot use the 'pargs' parameter without specifying a 'pgen'!" LOGGER.exception(msg) raise ArgumentError(msg) # Handle loading a custom ParameterGenerator if specified. if args.pgen: # 'pgen_args' has a default of an empty list, which should translate # to an empty dictionary. kwargs = create_dictionary(args.pargs) # Copy the Python file used to generate parameters. shutil.copy(args.pgen, output_path) parameters = load_parameter_generator(args.pgen, kwargs) else: parameters = spec.get_parameters() # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps, out_path=output_path) # Check if the submission attempts is greater than 0: if args.attempts < 1: _msg = "Submission attempts must be greater than 0. " \ "'{}' provided.".format(args.attempts) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the throttle is zero or greater: if args.throttle < 0: _msg = "Submission throttle must be a value of zero or greater. " \ "'{}' provided.".format(args.throttle) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the restart limit is zero or greater: if args.rlimit < 0: _msg = "Restart limit must be a value of zero or greater. " \ "'{}' provided.".format(args.rlimit) LOGGER.error(_msg) raise ArgumentError(_msg) # Set up the study workspace and configure it for execution. study.setup_workspace() study.setup_environment() study.configure_study(throttle=args.throttle, submission_attempts=args.attempts, restart_limit=args.rlimit, use_tmp=args.usetmp, hash_ws=args.hashws) # Stage the study. path, exec_dag = study.stage() # Write metadata study.store_metadata() if not spec.batch: exec_dag.set_adapter({"type": "local"}) else: if "type" not in spec.batch: spec.batch["type"] = "local" exec_dag.set_adapter(spec.batch) # Copy the spec to the output directory shutil.copy(args.specification, path) # Check for a dry run if args.dryrun: raise NotImplementedError("The 'dryrun' mode is in development.") # Pickle up the DAG pkl_path = make_safe_path(path, *["{}.pkl".format(study.name)]) exec_dag.pickle(pkl_path) # If we are automatically launching, just set the input as yes. if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input("Would you like to launch the study? [yn] ") if uinput.lower() in ACCEPTED_INPUT: if args.fg: # Launch in the foreground. LOGGER.info("Running Maestro Conductor in the foreground.") cancel_path = os.path.join(path, ".cancel.lock") # capture the StudyStatus enum to return completion_status = monitor_study(exec_dag, pkl_path, cancel_path, args.sleeptime) return completion_status.value else: # Launch manager with nohup log_path = make_safe_path(study.output_path, *["{}.txt".format(exec_dag.name)]) cmd = [ "nohup", "conductor", "-t", str(args.sleeptime), "-d", str(args.debug_lvl), path, "&>", log_path ] LOGGER.debug(" ".join(cmd)) start_process(" ".join(cmd)) print("Study launched successfully.") else: print("Study launch aborted.") return 0
def run_study(args): """Run a Maestro study.""" # Load the Specification spec = YAMLSpecification.load_specification(args.specification) environment = spec.get_study_environment() parameters = spec.get_parameters() steps = spec.get_study_steps() # Set up the output directory. out_dir = environment.remove("OUTPUT_PATH") if args.out: # If out is specified in the args, ignore OUTPUT_PATH. output_path = os.path.abspath(make_safe_path(args.out)) # If we are automatically launching, just set the input as yes. if os.path.exists(output_path): if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input( "Output path already exists. Would you like to overwrite " "it? [yn] ") if uinput.lower() in ACCEPTED_INPUT: print("Cleaning up existing out path...") shutil.rmtree(output_path) else: print("Opting to quit -- not cleaning up old out path.") sys.exit(0) else: if out_dir is None: # If we don't find OUTPUT_PATH in the environment, assume pwd. out_dir = os.path.abspath("./") else: # We just take the value from the environment. out_dir = os.path.abspath(out_dir.value) out_name = "{}_{}".format(spec.name.replace(" ", "_"), time.strftime("%Y%m%d-%H%M%S")) output_path = make_safe_path(out_dir, out_name) environment.add(Variable("OUTPUT_PATH", output_path)) # Now that we know outpath, set up logging. setup_logging(args, output_path, spec.name) # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps, out_path=output_path) # Check if the submission attempts is greater than 0: if args.attempts < 1: _msg = "Submission attempts must be greater than 0. " \ "'{}' provided.".format(args.attempts) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the throttle is zero or greater: if args.throttle < 0: _msg = "Submission throttle must be a value of zero or greater. " \ "'{}' provided.".format(args.throttle) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the restart limit is zero or greater: if args.rlimit < 0: _msg = "Restart limit must be a value of zero or greater. " \ "'{}' provided.".format(args.rlimit) LOGGER.error(_msg) raise ArgumentError(_msg) study.setup(throttle=args.throttle, submission_attempts=args.attempts, restart_limit=args.rlimit, use_tmp=args.usetmp) # Stage the study. path, exec_dag = study.stage() if not spec.batch: exec_dag.set_adapter({"type": "local"}) else: exec_dag.set_adapter(spec.batch) # Copy the spec to the output directory shutil.copy(args.specification, path) # Check for a dry run if args.dryrun: raise NotImplementedError("The 'dryrun' mode is in development.") # Pickle up the DAG pkl_path = os.path.join(path, "{}.pkl".format(study.name)) exec_dag.pickle(pkl_path) # If we are automatically launching, just set the input as yes. if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input("Would you like to launch the study? [yn] ") if uinput.lower() in ACCEPTED_INPUT: if args.fg: # Launch in the foreground. LOGGER.info("Running Maestro Conductor in the foreground.") cancel_path = os.path.join(path, ".cancel.lock") monitor_study(exec_dag, pkl_path, cancel_path, args.sleeptime) else: # Launch manager with nohup cmd = [ "nohup", "conductor", "-t", str(args.sleeptime), "-d", str(args.debug_lvl), path, "&>", "{}.txt".format(os.path.join(study.output_path, exec_dag.name)) ] LOGGER.debug(" ".join(cmd)) Popen(" ".join(cmd), shell=True, stdout=PIPE, stderr=PIPE) return 0
def main(): """ The launcher main function. This function uses command line arguments to locate the study description. It makes use of the maestrowf core data structures as a high level class inerface. """ # Set up the necessary base data structures to begin study set up. parser = setup_argparser() args = parser.parse_args() # Load the Specification spec = YAMLSpecification.load_specification(args.specification) environment = spec.get_study_environment() parameters = spec.get_parameters() steps = spec.get_study_steps() # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps) study.setup() setup_logging(args, study.output_path, study.name) # Stage the study. path, exec_dag = study.stage() if not spec.batch: exec_dag.set_adapter({"type": "local"}) else: exec_dag.set_adapter(spec.batch) # Copy the spec to the output directory shutil.copy(args.specification, path) # Generate scripts exec_dag.generate_scripts() exec_dag.pickle(os.path.join(path, "{}.pkl".format(study.name))) # If we are automatically launching, just set the input as yes. if args.autoyes: uinput = "y" else: uinput = raw_input("Would you like to launch the study?[yn] ") if uinput.lower() in ACCEPTED_INPUT: # Launch manager with nohup cmd = [ "nohup", "conductor", "-t", str(args.sleeptime), "-d", str(args.debug_lvl), path, "&>", "{}.txt".format(os.path.join(study.output_path, exec_dag.name)) ] LOGGER.debug(" ".join(cmd)) Popen(" ".join(cmd), shell=True, stdout=PIPE, stderr=PIPE) sys.exit(0)