def load_dag(self): """ Generates a dag (a directed acyclic execution graph). Assigns it to `self.dag`. """ environment = self.expanded_spec.get_study_environment() steps = self.expanded_spec.get_study_steps() parameters = self.expanded_spec.get_parameters() # Setup the study. study = Study( self.expanded_spec.name, self.expanded_spec.description, studyenv=environment, parameters=parameters, steps=steps, out_path=self.workspace, ) # Prepare the maestro study if self.restart_dir is None: study.setup_workspace() study.setup_environment() study.configure_study( throttle=0, submission_attempts=1, restart_limit=0, use_tmp=None, hash_ws=None, ) # Generate the DAG _, maestro_dag = study.stage() labels = [] if self.expanded_spec.merlin["samples"]: labels = self.expanded_spec.merlin["samples"]["column_labels"] self.dag = DAG(maestro_dag, labels)
def run_study(args): """Run a Maestro study.""" # Load the Specification try: spec = YAMLSpecification.load_specification(args.specification) except jsonschema.ValidationError as e: LOGGER.error(e.message) sys.exit(1) environment = spec.get_study_environment() steps = spec.get_study_steps() # Set up the output directory. out_dir = environment.remove("OUTPUT_PATH") if args.out: # If out is specified in the args, ignore OUTPUT_PATH. output_path = os.path.abspath(args.out) # If we are automatically launching, just set the input as yes. if os.path.exists(output_path): if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input( "Output path already exists. Would you like to overwrite " "it? [yn] ") if uinput.lower() in ACCEPTED_INPUT: print("Cleaning up existing out path...") shutil.rmtree(output_path) else: print("Opting to quit -- not cleaning up old out path.") sys.exit(0) else: if out_dir is None: # If we don't find OUTPUT_PATH in the environment, assume pwd. out_dir = os.path.abspath("./") else: # We just take the value from the environment. out_dir = os.path.abspath(out_dir.value) out_name = "{}_{}".format(spec.name.replace(" ", "_"), time.strftime("%Y%m%d-%H%M%S")) output_path = make_safe_path(out_dir, *[out_name]) environment.add(Variable("OUTPUT_PATH", output_path)) # Set up file logging create_parentdir(os.path.join(output_path, "logs")) log_path = os.path.join(output_path, "logs", "{}.log".format(spec.name)) LOG_UTIL.add_file_handler(log_path, LFORMAT, args.debug_lvl) # Check for pargs without the matching pgen if args.pargs and not args.pgen: msg = "Cannot use the 'pargs' parameter without specifying a 'pgen'!" LOGGER.exception(msg) raise ArgumentError(msg) # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Handle loading a custom ParameterGenerator if specified. if args.pgen: # 'pgen_args' has a default of an empty list, which should translate # to an empty dictionary. kwargs = create_dictionary(args.pargs) # Copy the Python file used to generate parameters. shutil.copy(args.pgen, output_path) # Add keywords and environment from the spec to pgen args. kwargs["OUTPUT_PATH"] = output_path kwargs["SPECROOT"] = spec_root # Load the parameter generator. parameters = load_parameter_generator(args.pgen, environment, kwargs) else: parameters = spec.get_parameters() # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps, out_path=output_path) # Check if the submission attempts is greater than 0: if args.attempts < 1: _msg = "Submission attempts must be greater than 0. " \ "'{}' provided.".format(args.attempts) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the throttle is zero or greater: if args.throttle < 0: _msg = "Submission throttle must be a value of zero or greater. " \ "'{}' provided.".format(args.throttle) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the restart limit is zero or greater: if args.rlimit < 0: _msg = "Restart limit must be a value of zero or greater. " \ "'{}' provided.".format(args.rlimit) LOGGER.error(_msg) raise ArgumentError(_msg) # Set up the study workspace and configure it for execution. study.setup_workspace() study.configure_study(throttle=args.throttle, submission_attempts=args.attempts, restart_limit=args.rlimit, use_tmp=args.usetmp, hash_ws=args.hashws, dry_run=args.dry) study.setup_environment() if args.dry: # If performing a dry run, drive sleep time down to generate scripts. sleeptime = 1 else: # else, use args to decide sleeptime sleeptime = args.sleeptime batch = {"type": "local"} if spec.batch: batch = spec.batch if "type" not in batch: batch["type"] = "local" # Copy the spec to the output directory shutil.copy(args.specification, study.output_path) # Use the Conductor's classmethod to store the study. Conductor.store_study(study) Conductor.store_batch(study.output_path, batch) # If we are automatically launching, just set the input as yes. if args.autoyes or args.dry: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input("Would you like to launch the study? [yn] ") if uinput.lower() in ACCEPTED_INPUT: if args.fg: # Launch in the foreground. LOGGER.info("Running Maestro Conductor in the foreground.") conductor = Conductor(study) conductor.initialize(batch, sleeptime) completion_status = conductor.monitor_study() conductor.cleanup() return completion_status.value else: # Launch manager with nohup log_path = make_safe_path(study.output_path, *["{}.txt".format(study.name)]) cmd = [ "nohup", "conductor", "-t", str(sleeptime), "-d", str(args.debug_lvl), study.output_path, ">", log_path, "2>&1" ] LOGGER.debug(" ".join(cmd)) start_process(" ".join(cmd)) print("Study launched successfully.") else: print("Study launch aborted.") return 0
def run_study(args): """Run a Maestro study.""" # Load the Specification spec = YAMLSpecification.load_specification(args.specification) environment = spec.get_study_environment() parameters = spec.get_parameters() steps = spec.get_study_steps() # Set up the output directory. out_dir = environment.remove("OUTPUT_PATH") if args.out: # If out is specified in the args, ignore OUTPUT_PATH. output_path = os.path.abspath(make_safe_path(args.out)) # If we are automatically launching, just set the input as yes. if os.path.exists(output_path): if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input( "Output path already exists. Would you like to overwrite " "it? [yn] ") if uinput.lower() in ACCEPTED_INPUT: print("Cleaning up existing out path...") shutil.rmtree(output_path) else: print("Opting to quit -- not cleaning up old out path.") sys.exit(0) else: if out_dir is None: # If we don't find OUTPUT_PATH in the environment, assume pwd. out_dir = os.path.abspath("./") else: # We just take the value from the environment. out_dir = os.path.abspath(out_dir.value) out_name = "{}_{}".format(spec.name.replace(" ", "_"), time.strftime("%Y%m%d-%H%M%S")) output_path = make_safe_path(out_dir, out_name) environment.add(Variable("OUTPUT_PATH", output_path)) # Now that we know outpath, set up logging. setup_logging(args, output_path, spec.name) # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps, out_path=output_path) # Check if the submission attempts is greater than 0: if args.attempts < 1: _msg = "Submission attempts must be greater than 0. " \ "'{}' provided.".format(args.attempts) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the throttle is zero or greater: if args.throttle < 0: _msg = "Submission throttle must be a value of zero or greater. " \ "'{}' provided.".format(args.throttle) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the restart limit is zero or greater: if args.rlimit < 0: _msg = "Restart limit must be a value of zero or greater. " \ "'{}' provided.".format(args.rlimit) LOGGER.error(_msg) raise ArgumentError(_msg) study.setup(throttle=args.throttle, submission_attempts=args.attempts, restart_limit=args.rlimit, use_tmp=args.usetmp) # Stage the study. path, exec_dag = study.stage() if not spec.batch: exec_dag.set_adapter({"type": "local"}) else: exec_dag.set_adapter(spec.batch) # Copy the spec to the output directory shutil.copy(args.specification, path) # Check for a dry run if args.dryrun: raise NotImplementedError("The 'dryrun' mode is in development.") # Pickle up the DAG pkl_path = os.path.join(path, "{}.pkl".format(study.name)) exec_dag.pickle(pkl_path) # If we are automatically launching, just set the input as yes. if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input("Would you like to launch the study? [yn] ") if uinput.lower() in ACCEPTED_INPUT: if args.fg: # Launch in the foreground. LOGGER.info("Running Maestro Conductor in the foreground.") cancel_path = os.path.join(path, ".cancel.lock") monitor_study(exec_dag, pkl_path, cancel_path, args.sleeptime) else: # Launch manager with nohup cmd = [ "nohup", "conductor", "-t", str(args.sleeptime), "-d", str(args.debug_lvl), path, "&>", "{}.txt".format(os.path.join(study.output_path, exec_dag.name)) ] LOGGER.debug(" ".join(cmd)) Popen(" ".join(cmd), shell=True, stdout=PIPE, stderr=PIPE) return 0
def run_study(args): """Run a Maestro study.""" # Load the Specification spec = YAMLSpecification.load_specification(args.specification) environment = spec.get_study_environment() steps = spec.get_study_steps() # Set up the output directory. out_dir = environment.remove("OUTPUT_PATH") if args.out: # If out is specified in the args, ignore OUTPUT_PATH. output_path = os.path.abspath(args.out) # If we are automatically launching, just set the input as yes. if os.path.exists(output_path): if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input( "Output path already exists. Would you like to overwrite " "it? [yn] ") if uinput.lower() in ACCEPTED_INPUT: print("Cleaning up existing out path...") shutil.rmtree(output_path) else: print("Opting to quit -- not cleaning up old out path.") sys.exit(0) else: if out_dir is None: # If we don't find OUTPUT_PATH in the environment, assume pwd. out_dir = os.path.abspath("./") else: # We just take the value from the environment. out_dir = os.path.abspath(out_dir.value) out_name = "{}_{}".format(spec.name.replace(" ", "_"), time.strftime("%Y%m%d-%H%M%S")) output_path = make_safe_path(out_dir, *[out_name]) environment.add(Variable("OUTPUT_PATH", output_path)) # Now that we know outpath, set up logging. setup_logging(args, output_path, spec.name.replace(" ", "_").lower()) # Check for pargs without the matching pgen if args.pargs and not args.pgen: msg = "Cannot use the 'pargs' parameter without specifying a 'pgen'!" LOGGER.exception(msg) raise ArgumentError(msg) # Handle loading a custom ParameterGenerator if specified. if args.pgen: # 'pgen_args' has a default of an empty list, which should translate # to an empty dictionary. kwargs = create_dictionary(args.pargs) # Copy the Python file used to generate parameters. shutil.copy(args.pgen, output_path) parameters = load_parameter_generator(args.pgen, kwargs) else: parameters = spec.get_parameters() # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps, out_path=output_path) # Check if the submission attempts is greater than 0: if args.attempts < 1: _msg = "Submission attempts must be greater than 0. " \ "'{}' provided.".format(args.attempts) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the throttle is zero or greater: if args.throttle < 0: _msg = "Submission throttle must be a value of zero or greater. " \ "'{}' provided.".format(args.throttle) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the restart limit is zero or greater: if args.rlimit < 0: _msg = "Restart limit must be a value of zero or greater. " \ "'{}' provided.".format(args.rlimit) LOGGER.error(_msg) raise ArgumentError(_msg) # Set up the study workspace and configure it for execution. study.setup_workspace() study.setup_environment() study.configure_study(throttle=args.throttle, submission_attempts=args.attempts, restart_limit=args.rlimit, use_tmp=args.usetmp, hash_ws=args.hashws) # Stage the study. path, exec_dag = study.stage() # Write metadata study.store_metadata() if not spec.batch: exec_dag.set_adapter({"type": "local"}) else: if "type" not in spec.batch: spec.batch["type"] = "local" exec_dag.set_adapter(spec.batch) # Copy the spec to the output directory shutil.copy(args.specification, path) # Check for a dry run if args.dryrun: raise NotImplementedError("The 'dryrun' mode is in development.") # Pickle up the DAG pkl_path = make_safe_path(path, *["{}.pkl".format(study.name)]) exec_dag.pickle(pkl_path) # If we are automatically launching, just set the input as yes. if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input("Would you like to launch the study? [yn] ") if uinput.lower() in ACCEPTED_INPUT: if args.fg: # Launch in the foreground. LOGGER.info("Running Maestro Conductor in the foreground.") cancel_path = os.path.join(path, ".cancel.lock") # capture the StudyStatus enum to return completion_status = monitor_study(exec_dag, pkl_path, cancel_path, args.sleeptime) return completion_status.value else: # Launch manager with nohup log_path = make_safe_path(study.output_path, *["{}.txt".format(exec_dag.name)]) cmd = [ "nohup", "conductor", "-t", str(args.sleeptime), "-d", str(args.debug_lvl), path, "&>", log_path ] LOGGER.debug(" ".join(cmd)) start_process(" ".join(cmd)) print("Study launched successfully.") else: print("Study launch aborted.") return 0
def main(): """ The launcher main function. This function uses command line arguments to locate the study description. It makes use of the maestrowf core data structures as a high level class inerface. """ # Set up the necessary base data structures to begin study set up. parser = setup_argparser() args = parser.parse_args() # Load the Specification spec = YAMLSpecification.load_specification(args.specification) environment = spec.get_study_environment() parameters = spec.get_parameters() steps = spec.get_study_steps() # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps) study.setup() setup_logging(args, study.output_path, study.name) # Stage the study. path, exec_dag = study.stage() if not spec.batch: exec_dag.set_adapter({"type": "local"}) else: exec_dag.set_adapter(spec.batch) # Copy the spec to the output directory shutil.copy(args.specification, path) # Generate scripts exec_dag.generate_scripts() exec_dag.pickle(os.path.join(path, "{}.pkl".format(study.name))) # If we are automatically launching, just set the input as yes. if args.autoyes: uinput = "y" else: uinput = raw_input("Would you like to launch the study?[yn] ") if uinput.lower() in ACCEPTED_INPUT: # Launch manager with nohup cmd = [ "nohup", "conductor", "-t", str(args.sleeptime), "-d", str(args.debug_lvl), path, "&>", "{}.txt".format(os.path.join(study.output_path, exec_dag.name)) ] LOGGER.debug(" ".join(cmd)) Popen(" ".join(cmd), shell=True, stdout=PIPE, stderr=PIPE) sys.exit(0)