Beispiel #1
0
 def setup_workspace(self):
     """Set up the study's main workspace directory."""
     try:
         logger.info("Setting up study workspace in '%s'", self._out_path)
         create_parentdir(self._out_path)
     except Exception as e:
         logger.error(e.args)
         return False
Beispiel #2
0
    def setup(self, submission_attempts=1, restart_limit=1):
        """
        Method for executing initial setup of a Study.

        The method is used for going through and actually acquiring each
        dependency, substituting variables, sources and labels. Also sets up
        the folder structure for the study.

        :param submission_attempts: Number of attempted submissions before
        marking a step as failed.
        :param restart_limit: Upper limit on the number of times a step with
        a restart command can be resubmitted before it is considered failed.
        :returns: True if the Study is successfully setup, False otherwise.
        """
        # If the study has been set up, just return.
        if self._issetup:
            logger.info("%s is already set up, returning.")
            return True

        self._submission_attempts = submission_attempts
        self._restart_limit = restart_limit

        # Set up the directory structure.
        # TODO: fdinatal - As I implement the high level program (manager and
        # launcher in bin), I'm starting to have questions about whether or
        # not the study set up is the place to handle the output path... it
        # feels like the determination of the output path should be at the
        # higher level.
        out_name = "{}_{}".format(self.name.replace(" ", "_"),
                                  time.strftime("%Y%m%d-%H%M%S"))
        self.output.value = os.path.join(self.output.value, out_name)

        # Set up the environment if it hasn't been already.
        if not self.environment.is_set_up:
            logger.info("Environment is setting up.")
            self.environment.acquire_environment()

        try:
            create_parentdir(self.output.value)
        except Exception as e:
            logger.error(e.message)
            return False

        # Apply all environment artifcacts and acquire everything.
        for key, node in self.values.items():
            logger.info("Applying to step '%s' of the study '%s'...", key,
                        node)
            if node:
                node.__dict__ = apply_function(
                    node.__dict__, self.environment.apply_environment)

        # Flag the study as set up.
        self._issetup = True
        return True
Beispiel #3
0
def setup_logging(name,
                  output_path,
                  log_lvl=2,
                  log_path=None,
                  log_stdout=False,
                  log_format=None):
    """
    Set up logging in the Main class.
    :param args: A Namespace object created by a parsed ArgumentParser.
    :param name: The name of the log file.
    """
    # Check if the user has specified a custom log path.
    if log_path:
        LOGGER.info("Log path overwritten by command line -- %s", log_path)
    else:
        log_path = os.path.join(output_path, "logs")

    if not log_format:
        log_format = LFORMAT

    loglevel = log_lvl * 10

    # Attempt to create the logging directory.
    create_parentdir(log_path)
    formatter = logging.Formatter(LFORMAT)
    ROOTLOGGER.setLevel(loglevel)

    # Set up handlers
    if log_stdout:
        handler = logging.StreamHandler()
        handler.setFormatter(formatter)
        ROOTLOGGER.addHandler(handler)

    log_file = os.path.join(log_path, "{}.log".format(name))
    handler = logging.FileHandler(log_file)
    handler.setFormatter(formatter)
    ROOTLOGGER.addHandler(handler)
    ROOTLOGGER.setLevel(loglevel)

    # Print the level of logging.
    LOGGER.info("INFO Logging Level -- Enabled")
    LOGGER.warning("WARNING Logging Level -- Enabled")
    LOGGER.critical("CRITICAL Logging Level -- Enabled")
    LOGGER.debug("DEBUG Logging Level -- Enabled")
Beispiel #4
0
def setup_logging(args, path, name):
    """
    Set up logging based on the ArgumentParser.

    :param args: A Namespace object created by a parsed ArgumentParser.
    :param path: A default path to be used if a log path is not specified by
        user command line arguments.
    :param name: The name of the log file.
    """
    # If the user has specified a path, use that.
    if args.logpath:
        logpath = args.logpath
    # Otherwise, we should just output to the OUTPUT_PATH.
    else:
        logpath = make_safe_path(path, *["logs"])

    loglevel = args.debug_lvl * 10

    # Create the FileHandler and add it to the logger.
    create_parentdir(logpath)
    formatter = logging.Formatter(LFORMAT)
    ROOTLOGGER.setLevel(loglevel)

    log_path = make_safe_path(logpath, *["{}.log".format(name)])
    fh = logging.FileHandler(log_path)
    fh.setLevel(loglevel)
    fh.setFormatter(formatter)
    ROOTLOGGER.addHandler(fh)

    if args.logstdout:
        # Add the StreamHandler
        sh = logging.StreamHandler()
        sh.setLevel(loglevel)
        sh.setFormatter(formatter)
        ROOTLOGGER.addHandler(sh)

    # Print the level of logging.
    LOGGER.info("INFO Logging Level -- Enabled")
    LOGGER.warning("WARNING Logging Level -- Enabled")
    LOGGER.critical("CRITICAL Logging Level -- Enabled")
    LOGGER.debug("DEBUG Logging Level -- Enabled")
Beispiel #5
0
def setup_logging(args, name):
    """
    Set up logging in the Main class.

    :param args: A Namespace object created by a parsed ArgumentParser.
    :param name: The name of the log file.
    """
    # Check if the user has specified a custom log path.
    if args.logpath:
        logger.info("Log path overwritten by command line -- %s", args.logpath)
        log_path = args.logpath
    else:
        log_path = os.path.join(args.directory, "logs")

    loglevel = args.debug_lvl * 10

    # Attempt to create the logging directory.
    create_parentdir(log_path)
    formatter = logging.Formatter(LFORMAT)
    rootlogger.setLevel(loglevel)

    # Set up handlers
    if args.logstdout:
        handler = logging.StreamHandler()
        handler.setFormatter(formatter)
        rootlogger.addHandler(handler)

    log_file = os.path.join(log_path, "{}.log".format(name))
    handler = logging.FileHandler(log_file)
    handler.setFormatter(formatter)
    rootlogger.addHandler(handler)
    rootlogger.setLevel(loglevel)

    # Print the level of logging.
    logger.info("INFO Logging Level -- Enabled")
    logger.warning("WARNING Logging Level -- Enabled")
    logger.critical("CRITICAL Logging Level -- Enabled")
    logger.debug("DEBUG Logging Level -- Enabled")
Beispiel #6
0
    def _setup_parameterized(self):
        """
        Set up the ExecutionGraph of a parameterized study.

        :returns: The path to the study's global workspace and an expanded
        ExecutionGraph based on the parameters and parameterized workflow
        steps.
        """
        # Construct ExecutionGraph
        dag = ExecutionGraph()
        dag.add_description(**self.description)
        # Items to store that should be reset.
        global_workspace = self.output.value  # Highest ouput dir

        # Rework begins here:
        # First step, we need to map each workflow step to the parameters that
        # they actually use -- and only the parameters used. This setup will
        # make it so that workflows can be constructed with implicit stages.
        # That's to say that if a step only requires a subset of parameters,
        # we only need to run the set of combinations dictated by that subset.
        # NOTE: We're going to need to make a way for users to access the
        # workspaces of other steps. With this rework we won't be able to
        # assume that every directory has all parameters on it.
        used_params = {}
        workspaces = {}
        for parent, step, node in self.walk_study():
            # Source doesn't matter -- ignore it.
            if step == SOURCE:
                continue

            # Otherwise, we have a valid key.
            # We need to collect used parameters for two things:
            # 1. Collect the used parameters for the current step.
            # 2. Get the used parameters for the parent step.
            # The logic here is that the used parameters are going to be the
            # union of the used parameters for this step and ALL parent steps.
            # If we keep including the step's parent parameters, we will simply
            # carry parent parameters recursively.
            step_params = self.parameters.get_used_parameters(node)
            if parent != SOURCE:
                step_params |= used_params[parent]
            used_params[step] = step_params

        logger.debug("Used Parameters - \n%s", used_params)

        # Secondly, we need to now iterate over all combinations for each step
        # and simply apply the combination. We can then add the name to the
        # expanded map using only the parameters that we discovered above.
        for combo in self.parameters:
            # For each Combination in the parameters...
            logger.info("==================================================")
            logger.info("Expanding study '%s' for combination '%s'", self.name,
                        str(combo))
            logger.info("==================================================")

            # For each step in the Study
            # Walk the study and construct subtree based on the combination.
            for parent, step, node in self.walk_study():
                # If we find the source node, we can just add it and continue.
                if step == SOURCE:
                    logger.debug("Source node found.")
                    dag.add_node(SOURCE, None)
                    continue

                logger.debug("Processing step '%s'.", step)
                # Due to the rework, we now can get the parameters used. We no
                # longer have to blindly apply the parameters. In fact, better
                # if we don't know. We have to see if the name exists in the
                # DAG first. If it does we can skip the step. Otherwise, apply
                # and add.
                if used_params[step]:
                    logger.debug("Used parameters %s", used_params[step])
                    # Apply the used parameters to the step.
                    modified, step_exp = node.apply_parameters(combo)
                    # Name the step based on the parameters used.
                    combo_str = combo.get_param_string(used_params[step])
                    step_name = "{}_{}".format(step_exp.name, combo_str)
                    logger.debug(
                        "Step has been modified. Step '%s' renamed"
                        " to '%s'", step_exp.name, step_name)
                    step_exp.name = step_name
                    logger.debug("Resulting step name: %s", step_name)

                    # Set the workspace to the parameterized workspace
                    self.output.value = os.path.join(global_workspace,
                                                     combo_str)

                    # We now should account for varying workspace locations.
                    # Search for the use of workspaces in the command line so
                    # that we can go ahead and fill in the appropriate space
                    # for this combination.
                    cmd = step_exp.run["cmd"]
                    used_spaces = re.findall(WSREGEX, cmd)
                    for match in used_spaces:
                        logger.debug("Workspace found -- %s", match)
                        # Append the parameters that the step uses matching the
                        # current combo.
                        combo_str = combo.get_param_string(used_params[match])
                        logger.debug("Combo str -- %s", combo_str)
                        if combo_str:
                            _ = "{}_{}".format(match, combo_str)
                        else:
                            _ = match
                        # Replace the workspace tag in the command.
                        workspace_var = "$({}.workspace)".format(match)
                        cmd = cmd.replace(workspace_var, workspaces[_])
                        logger.debug("New cmd -- %s", cmd)
                    step_exp.run["cmd"] = cmd
                else:
                    # Otherwise, we know that this step is a joining node.
                    step_exp = copy.deepcopy(node)
                    modified = False
                    logger.debug("No parameters found. Resulting name %s",
                                 step_exp.name)
                    self.output.value = os.path.join(global_workspace)

                # Add the workspace name to the map of workspaces.
                workspaces[step_exp.name] = self.output.value

                # Now we need to make sure we handle the dependencies.
                # We know the parent and the step name (whether it's modified
                # or not and is not _source). So now there's two cases:
                #   1. If the ExecutionGraph contains the parent name as it
                #      exists without parameterization, then we know we have
                #      a hub/joining node.
                #   2. If the ExecutionGraph does not have the parent node,
                #      then our next assumption is that it has a parameterized
                #      version of the parent. We need to check and make sure.
                #   3. Fall back third case... Abort. Something is not right.
                if step_exp.run["restart"]:
                    rlimit = self._restart_limit
                else:
                    rlimit = 0

                if parent != SOURCE:
                    # With the rework, we now need to check the parent's used
                    # parmeters.
                    combo_str = combo.get_param_string(used_params[parent])
                    param_name = "{}_{}".format(parent, combo_str)
                    # If the parent node is not '_source', check.
                    if parent in dag.values:
                        # If the parent is in the dag, add the current step...
                        dag.add_step(step_exp.name, step_exp,
                                     self.output.value, rlimit)
                        # And its associated edge.
                        dag.add_edge(parent, step_exp.name)
                    elif param_name in dag.values:
                        # Find the index in the step for the dependency...
                        i = step_exp.run['depends'].index(parent)
                        # Sub it with parameterized dependency...
                        step_exp.run['depends'][i] = param_name
                        # Add the node and edge.
                        dag.add_step(step_exp.name, step_exp,
                                     self.output.value, rlimit)
                        dag.add_edge(param_name, step_exp.name)
                    else:
                        msg = "'{}' nor '{}' found in the ExecutionGraph. " \
                              "Unexpected error occurred." \
                              .format(parent, param_name)
                        logger.error(msg)
                        raise ValueError(msg)
                else:
                    # If the parent is source, then we can just execute it from
                    # '_source'.
                    dag.add_step(step_exp.name, step_exp, self.output.value,
                                 rlimit)
                    dag.add_edge(SOURCE, step_exp.name)

                # Go ahead and substitute in the output path and create the
                # workspace in the ExecutionGraph.
                create_parentdir(self.output.value)
                step_exp.__dict__ = apply_function(step_exp.__dict__,
                                                   self.output.substitute)

                # logging
                logger.debug("---------------- Modified --------------")
                logger.debug("Modified = %s", modified)
                logger.debug("step_exp = %s", step_exp.__dict__)
                logger.debug("----------------------------------------")

                # Reset the output path to the global_workspace.
                self.output.value = global_workspace
                logger.info(
                    "==================================================")

        return global_workspace, dag
Beispiel #7
0
def run_study(args):
    """Run a Maestro study."""
    # Load the Specification
    try:
        spec = YAMLSpecification.load_specification(args.specification)
    except jsonschema.ValidationError as e:
        LOGGER.error(e.message)
        sys.exit(1)
    environment = spec.get_study_environment()
    steps = spec.get_study_steps()

    # Set up the output directory.
    out_dir = environment.remove("OUTPUT_PATH")
    if args.out:
        # If out is specified in the args, ignore OUTPUT_PATH.
        output_path = os.path.abspath(args.out)

        # If we are automatically launching, just set the input as yes.
        if os.path.exists(output_path):
            if args.autoyes:
                uinput = "y"
            elif args.autono:
                uinput = "n"
            else:
                uinput = six.moves.input(
                    "Output path already exists. Would you like to overwrite "
                    "it? [yn] ")

            if uinput.lower() in ACCEPTED_INPUT:
                print("Cleaning up existing out path...")
                shutil.rmtree(output_path)
            else:
                print("Opting to quit -- not cleaning up old out path.")
                sys.exit(0)

    else:
        if out_dir is None:
            # If we don't find OUTPUT_PATH in the environment, assume pwd.
            out_dir = os.path.abspath("./")
        else:
            # We just take the value from the environment.
            out_dir = os.path.abspath(out_dir.value)

        out_name = "{}_{}".format(spec.name.replace(" ", "_"),
                                  time.strftime("%Y%m%d-%H%M%S"))
        output_path = make_safe_path(out_dir, *[out_name])
    environment.add(Variable("OUTPUT_PATH", output_path))

    # Set up file logging
    create_parentdir(os.path.join(output_path, "logs"))
    log_path = os.path.join(output_path, "logs", "{}.log".format(spec.name))
    LOG_UTIL.add_file_handler(log_path, LFORMAT, args.debug_lvl)

    # Check for pargs without the matching pgen
    if args.pargs and not args.pgen:
        msg = "Cannot use the 'pargs' parameter without specifying a 'pgen'!"
        LOGGER.exception(msg)
        raise ArgumentError(msg)

    # Addition of the $(SPECROOT) to the environment.
    spec_root = os.path.split(args.specification)[0]
    spec_root = Variable("SPECROOT", os.path.abspath(spec_root))
    environment.add(spec_root)

    # Handle loading a custom ParameterGenerator if specified.
    if args.pgen:
        # 'pgen_args' has a default of an empty list, which should translate
        # to an empty dictionary.
        kwargs = create_dictionary(args.pargs)
        # Copy the Python file used to generate parameters.
        shutil.copy(args.pgen, output_path)

        # Add keywords and environment from the spec to pgen args.
        kwargs["OUTPUT_PATH"] = output_path
        kwargs["SPECROOT"] = spec_root

        # Load the parameter generator.
        parameters = load_parameter_generator(args.pgen, environment, kwargs)
    else:
        parameters = spec.get_parameters()

    # Setup the study.
    study = Study(spec.name,
                  spec.description,
                  studyenv=environment,
                  parameters=parameters,
                  steps=steps,
                  out_path=output_path)

    # Check if the submission attempts is greater than 0:
    if args.attempts < 1:
        _msg = "Submission attempts must be greater than 0. " \
               "'{}' provided.".format(args.attempts)
        LOGGER.error(_msg)
        raise ArgumentError(_msg)

    # Check if the throttle is zero or greater:
    if args.throttle < 0:
        _msg = "Submission throttle must be a value of zero or greater. " \
               "'{}' provided.".format(args.throttle)
        LOGGER.error(_msg)
        raise ArgumentError(_msg)

    # Check if the restart limit is zero or greater:
    if args.rlimit < 0:
        _msg = "Restart limit must be a value of zero or greater. " \
               "'{}' provided.".format(args.rlimit)
        LOGGER.error(_msg)
        raise ArgumentError(_msg)

    # Set up the study workspace and configure it for execution.
    study.setup_workspace()
    study.configure_study(throttle=args.throttle,
                          submission_attempts=args.attempts,
                          restart_limit=args.rlimit,
                          use_tmp=args.usetmp,
                          hash_ws=args.hashws,
                          dry_run=args.dry)
    study.setup_environment()

    if args.dry:
        # If performing a dry run, drive sleep time down to generate scripts.
        sleeptime = 1
    else:
        # else, use args to decide sleeptime
        sleeptime = args.sleeptime

    batch = {"type": "local"}
    if spec.batch:
        batch = spec.batch
        if "type" not in batch:
            batch["type"] = "local"
    # Copy the spec to the output directory
    shutil.copy(args.specification, study.output_path)

    # Use the Conductor's classmethod to store the study.
    Conductor.store_study(study)
    Conductor.store_batch(study.output_path, batch)

    # If we are automatically launching, just set the input as yes.
    if args.autoyes or args.dry:
        uinput = "y"
    elif args.autono:
        uinput = "n"
    else:
        uinput = six.moves.input("Would you like to launch the study? [yn] ")

    if uinput.lower() in ACCEPTED_INPUT:
        if args.fg:
            # Launch in the foreground.
            LOGGER.info("Running Maestro Conductor in the foreground.")
            conductor = Conductor(study)
            conductor.initialize(batch, sleeptime)
            completion_status = conductor.monitor_study()
            conductor.cleanup()
            return completion_status.value
        else:
            # Launch manager with nohup
            log_path = make_safe_path(study.output_path,
                                      *["{}.txt".format(study.name)])

            cmd = [
                "nohup", "conductor", "-t",
                str(sleeptime), "-d",
                str(args.debug_lvl), study.output_path, ">", log_path, "2>&1"
            ]
            LOGGER.debug(" ".join(cmd))
            start_process(" ".join(cmd))

            print("Study launched successfully.")
    else:
        print("Study launch aborted.")

    return 0
Beispiel #8
0
 def setup_workspace(self):
     """Initialize the record's workspace."""
     create_parentdir(self.workspace.value)
Beispiel #9
0
    def setup(self, submission_attempts=1, restart_limit=1, throttle=0,
              use_tmp=False):
        """
        Perform initial setup of a study.

        The method is used for going through and actually acquiring each
        dependency, substituting variables, sources and labels. Also sets up
        the folder structure for the study.

        :param submission_attempts: Number of attempted submissions before
            marking a step as failed.
        :param restart_limit: Upper limit on the number of times a step with
        a restart command can be resubmitted before it is considered failed.
        :param throttle: The maximum number of in-progress jobs allowed. [0
        denotes no cap].
        :param use_tmp: Boolean value specifying if the generated
        ExecutionGraph dumps its information into a temporary directory.
        :returns: True if the Study is successfully setup, False otherwise.
        """
        # If the study has been set up, just return.
        if self._issetup:
            logger.info("%s is already set up, returning.")
            return True

        self._submission_attempts = submission_attempts
        self._restart_limit = restart_limit
        self._submission_throttle = throttle
        self._use_tmp = use_tmp

        logger.info(
            "\n------------------------------------------\n"
            "Output path =               %s\n"
            "Submission attempts =       %d\n"
            "Submission restart limit =  %d\n"
            "Submission throttle limit = %d\n"
            "Use temporary directory =   %s\n"
            "------------------------------------------",
            self._out_path, submission_attempts, restart_limit, throttle,
            use_tmp
        )

        # Set up the environment if it hasn't been already.
        if not self.environment.is_set_up:
            logger.info("Environment is setting up.")
            self.environment.acquire_environment()

        try:
            logger.info("Environment is setting up.")
            create_parentdir(self._out_path)
        except Exception as e:
            logger.error(e.message)
            return False

        # Apply all environment artifcacts and acquire everything.
        for key, node in self.values.items():
            logger.info("Applying to step '%s' of the study '%s'...",
                        key, node)
            if node:
                node.__dict__ = apply_function(
                                    node.__dict__,
                                    self.environment.apply_environment)

        # Flag the study as set up.
        self._issetup = True
        return True
Beispiel #10
0
    def store_metadata(self):
        """Store metadata related to the study."""
        # Create the metadata directory.
        create_parentdir(self._meta_path)

        # Store the environment object in order to preserve it.
        path = os.path.join(self._meta_path, "study")
        create_parentdir(path)
        path = os.path.join(path, "env.pkl")
        with open(path, 'wb') as pkl:
            pickle.dump(self, pkl)

        # Construct other metadata related to study construction.
        _workspaces = {}
        for key, value in self.workspaces.items():
            if key == "_source":
                _workspaces[key] = value
            elif key in self.step_combos:
                _workspaces[key] = os.path.split(value)[-1]
            else:
                _workspaces[key] = \
                    os.path.sep.join(value.rsplit(os.path.sep)[-2:])

        # Construct relative paths for the combinations and nest them in the
        # same way as the step combinations dictionary.
        _step_combos = {}
        for key, value in self.step_combos.items():
            if key == SOURCE:
                _step_combos[key] = self.workspaces[key]
            elif not self.used_params[key]:
                _ws = self.workspaces[key]
                _step_combos[key] = {key: os.path.split(_ws)[-1]}
            else:
                _step_combos[key] = {}
                for combo in value:
                    _ws = self.workspaces[combo]
                    _step_combos[key][combo] = \
                        os.path.sep.join(_ws.rsplit(os.path.sep)[-2:])

        metadata = {
            "dependencies": self.depends,
            "hub_dependencies": self.hub_depends,
            "workspaces": _workspaces,
            "used_parameters": self.used_params,
            "step_combinations": _step_combos,
        }
        # Write out the study construction metadata.
        path = os.path.join(self._meta_path, "metadata.yaml")
        with open(path, "wb") as metafile:
            metafile.write(yaml.dump(metadata).encode("utf-8"))

        # Write out parameter metadata.
        metadata = self.parameters.get_metadata()
        path = os.path.join(self._meta_path, "parameters.yaml")
        with open(path, "wb") as metafile:
            metafile.write(yaml.dump(metadata).encode("utf-8"))

        # Write out environment metadata
        path = os.path.join(self._meta_path, "environment.yaml")
        with open(path, "wb") as metafile:
            metafile.write(yaml.dump(os.environ.copy()).encode("utf-8"))