Esempio n. 1
0
def _filechecks(path, filename):
    """Check the file paths to make sure they are valid."""
    # Initialise variables
    addfile = ""

    # If the filename has an absolute path but doesn't exist locally, assume
    # it is on the remote resource
    if os.path.isabs(filename) is True:

        if os.path.isfile(filename) is False:

            addfile = ""

        else:

            raise exceptions.RequiredinputError(
                "It appears that the user is trying to refer to a file '{0}' "
                "using an explicit path. Please just provide the names of "
                "input files".format(filename))

    # Else, if the file is in the given path
    elif os.path.isfile(os.path.join(path, filename)) is True:

        addfile = filename

    # Otherwise issue a warning
    else:

        raise exceptions.RequiredinputError(
            "It appears the file '{0}' is not present in the expected"
            " directory.".format(filename))

    return addfile
Esempio n. 2
0
def _newfilechecks(addfile, newfile, path):
    """Perform basic checks of on any new file."""
    if newfile.count("../") == 1:

        # If we are in a repX subdirectory, the file must be in cwd.
        if re.search(r'rep\d', addfile):

            _, _, newfile = newfile.rpartition("/")

        # Else we must be in cwd so issue a warning about referring to a file
        # that is above cwd.
        else:

            raise exceptions.RequiredinputError(
                "It appears that the user is trying to refer to a file '{0}' "
                "in file '{1}' that is a directory up from the '{2}' "
                "directory. Only files in '{2}' or a repX subdirectory can be "
                "copied to the remote resource. If the file you are trying to "
                "refer to is on the remote resource, give the explicit path "
                "to the file.".format(newfile, addfile, path))

    # Else ../../ is used in an input script issue an error.
    elif newfile.count("../") > 1:

        raise exceptions.RequiredinputError(
            "It appears that the user is trying to refer to a file '{0}' in "
            "file '{1}' that's multiple directories up from a valid "
            "directory. This is not permitted. If the file you are trying to "
            "refer to is on the remote resource, give the explicit path to "
            "the file.".format(newfile, addfile))

    # Else we are in a repX subdirectory and the file isn't in ../ or ./repX,
    # the file is likely in the same directory.
    elif re.search(r'rep\d', addfile) and not re.search(r'rep\d', newfile):

        splitpath, _ = os.path.split(addfile)
        newfile = os.path.join(splitpath, newfile)

    # Else newfile is indicated to be in a repX subdirectory.
    elif re.search(r'rep\d', newfile):

        # If we are already in a repX subdirectory throw exception.
        if re.search(r'rep\d', addfile):

            raise exceptions.RequiredinputError(
                "It appears that the user is trying to refer to a file '{0}' "
                "that is in a repX/repX subdirectory. This is not permitted.".
                format(newfile))

        # Else we must be in cwd.
        else:

            newfile = "rep" + newfile.split("rep")[1]

    return newfile
Esempio n. 3
0
def _hostfileproc(parameters):
    """Locate the host configuration file."""
    # Hosts - if a filename hasn't been provided default to hosts.conf
    if parameters["hosts"] is "":

        parameters["hosts"] = "hosts.conf"

    # If a full absolute path has not been provided then check within the
    # current working directory, ~/.longbow directory and the execution
    # directory.
    if os.path.isabs(parameters["hosts"]) is False:

        # CWD.
        cwd = os.path.join(os.getcwd(), parameters["hosts"])

        # Path for ~/.longbow directory.
        longbowdir = os.path.join(os.path.expanduser("~/.longbow"),
                                  parameters["hosts"])

        if os.path.isfile(cwd):

            parameters["hosts"] = cwd

        # The ~/.longbow directory.
        elif os.path.isfile(longbowdir):

            parameters["hosts"] = longbowdir

        else:

            raise exceptions.RequiredinputError(
                "No host configuration file found in the current working "
                "directory '{0}', the execution directory '{1}' or in the "
                "~/.longbow directory.".format(
                    os.getcwd(), os.path.dirname(os.path.realpath(__file__))))
Esempio n. 4
0
def _proccommandline(job, filelist, foundflags, substitution):
    """Command-line processor.

    This method selects which type of command-line we have.

    """
    # Initialisation.
    appplugins = getattr(apps, "PLUGINEXECS")
    executable = os.path.basename(job["executable"])
    app = appplugins[executable]
    args = list(job["executableargs"])
    subexe = getattr(apps, app.lower()).EXECDATA[executable]["subexecutables"]

    try:

        for arg in args:

            if (arg != "<" and arg != ">" and arg[0] != "-" and arg[0] != "+"
                    and arg not in subexe):

                foundflags = _procfiles(job, arg, filelist, foundflags,
                                        substitution)

    except (IndexError, ValueError):

        raise exceptions.RequiredinputError(
            "In job '{0}', the command-line arguments for the application "
            "could not be understood. Check the documentation for more "
            "information on how to format command-lines.".format(
                job["jobname"]))

    return foundflags
Esempio n. 5
0
def _flagvalidator(job, foundflags):
    """Validate that required command-line flags are provided."""
    # Initialisation.
    appplugins = getattr(apps, "PLUGINEXECS")
    executable = os.path.basename(job["executable"])
    app = appplugins[executable]
    execdata = getattr(apps, app.lower()).EXECDATA[executable]

    # Final check for if any required flags are missing.
    flags = list(set(execdata["requiredfiles"]) - set(foundflags))

    # If there are any missing still then tell the user.
    if len(flags) > 0:

        # Firstly is this due to it being an either type flag?
        for flag in flags:

            if "||" in flag:

                tmpflags = flag.split(" || ")
                tmpflag = list(set(tmpflags).intersection(set(foundflags)))

                if len(tmpflag) > 0:

                    flags.remove(flag)

    # If there are any missing still then tell the user.
    if len(flags) > 0:

        raise exceptions.RequiredinputError(
            "In job '{0}' the following arguments '{1}' to the application "
            "'{2}' are either missing or they require an input file to be "
            "specified, which has been found to be missing. Please check your "
            "command-line and filenames.".format(job["jobname"], flags, app))
Esempio n. 6
0
def _fileopen(path, addfile):
    """Open a file and return the handle."""
    # Initialise variable
    fil = None

    try:

        fil = open(os.path.join(path, addfile), "r")

    except (IOError, OSError):

        raise exceptions.RequiredinputError(
            "Can't read the file '{0}'".format(addfile))

    return fil
Esempio n. 7
0
def recovery(jobs, recoveryfile):
    """Recover a Longbow session.

    This method is for attempting to recover a failed Longbow session or to
    reconnect to an intentionally disconnected session. It will try to take the
    recovery file, written shortly after submission to recover the whole
    session. Once the data has been loaded from the recovery file and a new job
    data structure populated, this method will then re-enter the monitoring
    function to continue where it left off. Any jobs that finished in the
    meantime will be marked accordingly and then file staging will continue.

    Required inputs are:
    recoveryfile (string): A path to the recovery file.

    """

    jobfile = os.path.join(os.path.expanduser('~/.longbow'), recoveryfile)

    LOG.info("Attempting to find the recovery file '{0}'".format(jobfile))

    # Load the jobs recovery file.
    if os.path.isfile(jobfile):

        LOG.info("Recovery file found.")

        _, _, jobparams = configuration.loadconfigs(jobfile)

        # Copy to jobs so when exceptions are raised the structure is
        # available.
        for param in jobparams:

            jobs[param] = jobparams[param]

    else:

        raise exceptions.RequiredinputError(
            "Recovery file could not be found, make sure you haven't deleted "
            "the recovery file and that you are not providing the full path, "
            "just the file name is needed.")

    # Rejoin at the monitoring stage. This will assume that all jobs that
    # are no longer in the queue have completed.
    scheduling.monitor(jobs)

    # Cleanup the remote working directory.
    staging.cleanup(jobs)
Esempio n. 8
0
def update(jobs, updatefile):
    """Trigger update of a disconnected Longbow session.

    This method will start the update process on an existing but disconnected
    Longbow session. All job statuses will be checked and updated in the
    recovery file and all output files will be synced before disconnecting."""

    jobfile = os.path.join(os.path.expanduser('~/.longbow'), updatefile)

    LOG.info("Attempting to find the recovery file '{0}'".format(jobfile))

    # Load the jobs recovery file.
    if os.path.isfile(jobfile):

        LOG.info("Recovery file found.")

        _, _, jobparams = configuration.loadconfigs(jobfile)

        # Copy to jobs so when exceptions are raised the structure is
        # available.
        for param in jobparams:

            jobs[param] = jobparams[param]

    else:

        raise exceptions.RequiredinputError(
            "Recovery file could not be found, make sure you haven't deleted "
            "the recovery file and that you are not providing the full path, "
            "just the file name is needed.")

    # Add the updater key
    jobs["lbowconf"]["update"] = True

    # Enter monitoring loop
    scheduling.monitor(jobs)

    # Cleanup the remote working directory.
    staging.cleanup(jobs)
Esempio n. 9
0
def _jobfileproc(parameters):
    """Locate the job configuration file."""
    # Job - if a job configuration file has been supplied but the path hasn't
    # look in the current working directory and then the execution directory
    # if needs be.
    if parameters["job"] is not "":

        if os.path.isabs(parameters["job"]) is False:

            # Path for CWD.
            cwd = os.path.join(os.getcwd(), parameters["job"])

            if os.path.isfile(cwd):

                parameters["job"] = cwd

            else:

                raise exceptions.RequiredinputError(
                    "The job configuration file '{0}' couldn't be found in "
                    "the current working directory '{1}', the execution "
                    "directory '{2}'.".format(
                        parameters["job"], os.getcwd(),
                        os.path.dirname(os.path.realpath(__file__))))
Esempio n. 10
0
def launcher():
    """Entry point for Longbow when used as an application.

    This method is the main entry point for Longbow launched as an application.
    Library users should not use this method when linking Longbow at a high
    level. Developers doing high level linking should be calling Longbow()
    directly with the parameters dictionary already setup.

    This method takes the information from sys.argv and processes this into a
    dictionary format ready to fire longbow().

    """
    # -------------------------------------------------------------------------
    # Some defaults and parameter initialisation

    # Fetch command line arguments as list and remove longbow exec
    commandlineargs = sys.argv
    commandlineargs.pop(0)

    # Initialise parameters that could alternatively be provided in
    # configuration files
    parameters = {
        "debug": False,
        "disconnect": False,
        "executable": "",
        "executableargs": "",
        "hosts": "",
        "job": "",
        "jobname": "",
        "log": "",
        "maxtime": "",
        "nochecks": False,
        "recover": "",
        "resource": "",
        "replicates": "",
        "update": "",
        "verbose": False
    }

    # Specify all recognised longbow arguments
    alllongbowargs = [
        "--about", "--debug", "--disconnect", "--examples", "-h", "--help",
        "--hosts", "--job", "--jobname", "--log", "--maxtime", "--nochecks",
        "--recover", "--resource", "--replicates", "--update", "-V",
        "--verbose", "--version"
    ]

    # -------------------------------------------------------------------------
    # Detection of commandline flags and sub functionality.

    # Detect Longbow arguments, the executable and the executable arguments
    # from the command-line.
    longbowargs = _commandlineproc(alllongbowargs, commandlineargs, parameters)

    # Check for information flags such as help or about
    _messageflags(longbowargs)

    # Check if user is wanting to download examples
    _downloadexamples(longbowargs)

    # Grab the Longbow command-line arguments and their values.
    _parsecommandlineswitches(parameters, longbowargs)

    # Logging should be started here, such that only users of the application
    # have logging rules and filters setup. Library users will want/need to
    # set up their own handlers.
    _setuplogger(parameters)

    # -------------------------------------------------------------------------
    # Setup the top level exception handler, this handler should give the user
    # nicely formatted and understandable error messages (unless run in debug
    # mode).

    # The top level exception handler, this level is simply for the graceful
    # exit and final reporting of errors only. All actions should have been
    # taken by this stage.
    try:

        # Log the start up message, if the user got this far then we are ok to
        # properly start Longbow.
        LOG.info("Welcome to Longbow!")
        LOG.info("This software was developed as part of the EPSRC-funded "
                 "HECBioSim project (http://www.hecbiosim.ac.uk/)")
        LOG.info("HECBioSim facilitates high-end biomolecular simulation "
                 "on resources such as ARCHER")
        LOG.info("Longbow is Copyright (C) of Science and Technology "
                 "Facilities Council and The University of Nottingham.")
        LOG.info("Longbow was created by Dr James T. Gebbie-Rayet, Dr Gareth "
                 "B. Shannon and Prof Charles A. Laughton.")
        LOG.info("Please cite our paper: Gebbie-Rayet, J, Shannon, G, "
                 "Loeffler, H H and Laughton, C A 2016 Longbow: A "
                 "Lightweight Remote Job Submission Tool. Journal of "
                 "Open Research Software, 4: e1, "
                 "DOI: http://dx.doi.org/10.5334/jors.95")
        LOG.info("Python version: %s", PYTHONVERSION)
        LOG.info("Longbow version: %s", LONGBOWVERSION)
        LOG.info("Longbow Commandline: %s", (" ").join(sys.argv))

        _hostfileproc(parameters)
        _jobfileproc(parameters)

        LOG.info("hosts file is: '%s'", parameters["hosts"])

        # If no executable and jobfile has been given then fail.
        if (parameters["executable"] == "" and parameters["job"] == ""
                and parameters["recover"] == ""
                and parameters["update"] == ""):

            raise exceptions.RequiredinputError(
                "There was no executable or job file given on the "
                "command-line, you need to supply one or the other otherwise "
                "Longbow cannot decipher what you would like to do.")

        # ---------------------------------------------------------------------
        # Call one of the main methods at the top level of the library.

        jobs = {}

        # If recovery or update mode is not active then this is a new run.
        if parameters["recover"] == "" and parameters["update"] == "":

            LOG.info("Initialisation complete.")

            longbow(jobs, parameters)

        # If recovery mode is set then start the recovery process.
        elif parameters["recover"] != "" and parameters["update"] == "":

            LOG.info("Starting recovery mode to reconnect monitoring of jobs.")

            recovery(jobs, parameters["recover"])

        # If update mode is set then start the update process.
        elif parameters["recover"] == "" and parameters["update"] != "":

            LOG.info("Starting update mode to refresh progress of jobs.")

            update(jobs, parameters["update"])

        # If too many arguments are set, we have a problem
        else:

            raise exceptions.CommandlineargsError(
                "You have both the --recover and --update command-line flags "
                "set, these cannot be used together as they enable "
                "conflicting functionality. Either reconnect with persistent "
                "monitoring (--recover) or reconnect to refresh the status of "
                "jobs and sync current files before disconnecting again "
                "(--update).")

    # If the user interrupts Longbow then they are aborting the jobs, so kill
    # off any running jobs and then remove the job directories. Otherwise just
    # raise all other errors to the top level where in future we can attempt to
    # recover.
    except KeyboardInterrupt:

        LOG.info("User interrupt detected.")

        if len([a for a in jobs if "lbowconf" not in a]) >= 1:

            LOG.info("Kill any queued or running jobs and clean up.")

            # If we are exiting at this stage then we need to kill off
            for item in [a for a in jobs if "lbowconf" not in a]:

                job = jobs[item]

                if "laststatus" in job:

                    # If job is not finished delete and stage.
                    if (job["laststatus"] != "Complete"
                            and job["laststatus"] != "Finished"
                            and job["laststatus"] != "Submit Error"):

                        # Kill it.
                        scheduling.delete(job)

                        # Transfer the directories as they are.
                        staging.stage_downstream(job)

                    # Job is finished then just stage.
                    elif job["laststatus"] != "Submit Error":

                        # Transfer the directories as they are.
                        staging.stage_downstream(job)

            staging.cleanup(jobs)

    # If disconnect mode is enabled then the disconnect exception is raised,
    # allow to disconnect gracefully.
    except exceptions.DisconnectException:

        LOG.info("User specified --disconnect flag on command-line, so "
                 "Longbow will exit.")
        LOG.info("You can reconnect this session for persistent monitoring by "
                 "using the recovery file:")
        LOG.info("longbow --recover {0} --verbose".format(
            jobs["lbowconf"]["recoveryfile"]))
        LOG.info("Or an update of current progress followed by disconnecting "
                 "can be done using:")
        LOG.info("longbow --update {0} --verbose".format(
            jobs["lbowconf"]["recoveryfile"]))

    # If disconnect mode is enabled then the disconnect exception is raised,
    # allow to disconnect gracefully.
    except exceptions.UpdateExit:

        LOG.info("Update of current job progress has completed, exiting.")
        LOG.info("You can reconnect this session for persistent monitoring by "
                 "using the recovery file:")
        LOG.info("longbow --recover {0} --verbose".format(
            jobs["lbowconf"]["recoveryfile"]))
        LOG.info("Or an update of current progress followed by disconnecting "
                 "can be done using:")
        LOG.info("longbow --update {0} --verbose".format(
            jobs["lbowconf"]["recoveryfile"]))

    # If a problem happens assign the correct level of debug logging.
    except Exception as err:

        if parameters["debug"] is True:

            LOG.exception(err)

        else:

            LOG.error(err)

        exit(1)

    # Show nice exit message.
    finally:

        LOG.info("Good bye from Longbow!")
        LOG.info("Check out http://www.hecbiosim.ac.uk/ for other "
                 "powerful biomolecular simulation software tools.")
Esempio n. 11
0
def processjobs(jobs):
    """Process the application portion of the command-line.

    This method will process information that is given as an intended target to
    be passed on to the executable at run time. It will check that required
    parameters (provided the respective plug-in is configured correctly) have
    been supplied, and that all files and their dependencies (again provided
    that the respective plug-in is configured for this) exist on disk.

    Required arguments are:

    jobs (dictionary) - The Longbow jobs data structure, see configuration.py
                        for more information about the format of this
                        structure.

    """
    LOG.info("Processing job/s and detecting files that require upload.")

    # Process each job.
    for job in [a for a in jobs if "lbowconf" not in a]:

        filelist = []
        foundflags = []
        substitution = {}

        LOG.debug("Command-line arguments for job '%s' are '%s'", job,
                  " ".join(jobs[job]["executableargs"]))

        # Check for any files that are located outside the work directory or
        # absolute paths.
        for arg in jobs[job]["executableargs"]:

            if arg.count(os.path.pardir) > 0 or os.path.isabs(arg):

                raise exceptions.RequiredinputError(
                    "In job '{0}' input files are being provided with absolute"
                    " paths or from directories above localworkdir. This is "
                    "not supported".format(job))

        # If we have multiple jobs.
        if len([a for a in jobs if "lbowconf" not in a]) > 1:

            # Add the job name to the path.
            jobs[job]["localworkdir"] = os.path.join(jobs[job]["localworkdir"],
                                                     job)

        # Check that the directory exists.
        if os.path.isdir(jobs[job]["localworkdir"]) is False:

            # If not, this is bad.
            raise exceptions.DirectorynotfoundError(
                "The local job directory '{0}' cannot be found for job '{1}'".
                format(jobs[job]["localworkdir"], job))

        # Here we want to support generic executable launching. To do this
        # we will switch off all checking and testing and simply upload all
        # files in the job directory.
        try:

            appplugins = getattr(apps, "PLUGINEXECS")
            app = appplugins[os.path.basename(jobs[job]["executable"])]

        except KeyError:

            LOG.info("The software you are using is unsupported by a plugin. "
                     "Longbow will attempt to submit, but will assume you are"
                     "supplying modules manually or have used a absolute path"
                     "to your executable. If you think this is in error, "
                     "please open a ticket on github.")

            jobs[job]["upload-include"] = ""
            jobs[job]["upload-exclude"] = "*.log"

            # Replace the input command line with the execution command line.
            jobs[job]["executableargs"] = (
                jobs[job]["executable"] + " " +
                " ".join(jobs[job]["executableargs"]))

            LOG.info("For job '%s' - execution string: %s", job,
                     jobs[job]["executableargs"])

            LOG.info("Processing jobs - complete.")

            return

        # Hook to determine command-line parameter substitutions.
        try:

            substitution = getattr(apps, app.lower()).detectsubstitutions(
                list(jobs[job]["executableargs"]))

        except AttributeError:

            pass

        # Process the command-line.
        foundflags = _proccommandline(jobs[job], filelist, foundflags,
                                      substitution)

        # Validate if all required flags are present.
        _flagvalidator(jobs[job], foundflags)

        # Some programs are too complex to do file detection, such as
        # chemshell.
        try:

            substitution = getattr(apps,
                                   app.lower()).rsyncuploadhook(jobs, job)

        except AttributeError:

            # Setup the rysnc upload masks.
            if jobs[job]["upload-include"] != "":

                jobs[job]["upload-include"] = (jobs[job]["upload-include"] +
                                               ", ")

            jobs[job]["upload-include"] = (jobs[job]["upload-include"] +
                                           ", ".join(filelist))

            jobs[job]["upload-exclude"] = "*"

        # Replace the input command line with the execution command line.
        jobs[job]["executableargs"] = (jobs[job]["executable"] + " " +
                                       " ".join(jobs[job]["executableargs"]))

        LOG.info("For job '%s' - execution string: %s", job,
                 jobs[job]["executableargs"])

    LOG.info("Processing jobs - complete.")