Ejemplo n.º 1
0
    def __init__(self):

        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)

        self._arg('ngpu',
                  'int',
                  'Number of GPUs',
                  default=0,
                  validator=Number(int, '0POS'))
        self._arg('ncpu',
                  'int',
                  'Number of CPUs',
                  default=1,
                  validator=Number(int, '0POS'))
        self._arg('memory',
                  'int',
                  'Amount of memory (MB)',
                  default=1000,
                  validator=Number(int, 'POS'))
        self._arg('max_jobs',
                  'int',
                  'Maximum number of concurent jobs',
                  default=sys.maxsize,
                  validator=Number(int, 'POS'))

        self._arg('token',
                  'str',
                  'PM token',
                  required=True,
                  validator=String())
        self._arg('app', 'str', 'App name', required=True, validator=String())

        self._dirs = {}
Ejemplo n.º 2
0
    def __init__(self):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg(
            "datadir",
            "str",
            "The path in which to store completed trajectories.",
            None,
            val.String(),
        )
        self._arg(
            "trajext",
            "str",
            "Extension of trajectory files. This is needed to copy them to datadir.",
            "xtc",
            val.String(),
        )
        self._arg(
            "copy",
            "list",
            "A list of file names or globs for the files to copy to datadir",
            ("*.xtc", ),
            val.String(),
            nargs="*",
        )
        self._cmdDeprecated("trajext", "copy")

        self._states = dict()
        self._queue = None
        self._shutdown = False
Ejemplo n.º 3
0
    def __init__(self,
                 _configapp=None,
                 _configfile=None,
                 _findExecutables=True,
                 _logger=True):
        from playmolecule import Session, Job

        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)

        self._arg(
            "parentjob",
            "playmolecule.job.Job",
            "Spawn all jobs as children of this job",
            default=None,
            required=False,
            validator=val.Object(Job),
        )
        self._arg(
            "session",
            "playmolecule.session.Session",
            "The current PMWS Session object",
            required=True,
            validator=val.Object(Session),
        )
        self._arg("jobname", "str", "Job name (identifier)", None,
                  val.String())
        self._arg("group", "str", "Group name (identifier)", None,
                  val.String())
        self._arg(
            "ngpu",
            "int",
            "Number of GPUs",
            default=0,
            validator=val.Number(int, "0POS"),
        )
        self._arg(
            "ncpu",
            "int",
            "Number of CPUs",
            default=1,
            validator=val.Number(int, "0POS"),
        )
        self._arg(
            "memory",
            "int",
            "Amount of memory (MB)",
            default=1000,
            validator=val.Number(int, "POS"),
        )
        self._arg("app",
                  "str",
                  "App name",
                  required=True,
                  validator=val.String())
        self._arg(
            "configname",
            "str",
            "Name of the file containing the individual job configurations yaml or json. Not a filepath, just the name. All submitted folders must contain this file.",
            None,
            val.String(),
        )
        self._arg(
            "retrievedir",
            "str",
            "Directory in which to retrieve the results of jobs",
            None,
            val.String(),
        )
        self._arg(
            "datadir",
            "str",
            "Directory in which to copy or symlink the output directory.",
            None,
            val.String(),
        )
        self._arg(
            "symlink",
            "bool",
            "Set to False to copy instead of symlink the directories from the retrievedir to datadir",
            True,
            val.Boolean(),
        )
        self._arg(
            "copy",
            "list",
            "A list of file names or globs for the files to copy or symlink from retrievedir to datadir.",
            ("/", ),
            val.String(),
            nargs="*",
        )

        loadConfig(self, "playmolecule", _configfile, _configapp, _logger)
Ejemplo n.º 4
0
    def __init__(self,
                 _configapp=None,
                 _configfile=None,
                 _findExecutables=True,
                 _logger=True):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg("jobname", "str", "Job name (identifier)", None,
                  val.String())
        self._arg(
            "queue",
            "list",
            "The queue or list of queues to run on. If list, it attempts to submit the job to "
            "the first queue listed",
            self._defaults["queue"],
            val.String(),
            nargs="*",
        )
        self._arg(
            "ngpu",
            "int",
            "Number of GPUs to use for a single job",
            self._defaults["ngpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "ncpu",
            "int",
            "Number of CPUs to use for a single job",
            self._defaults["ncpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "memory",
            "int",
            "Amount of memory per job (MiB)",
            self._defaults["memory"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "walltime",
            "int",
            "Job timeout (hour:min or min)",
            self._defaults["walltime"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "pe",
            "str",
            "SGE Parallel Environment",
            self._defaults["pe"],
            val.String(),
        )
        self._arg(
            "resources",
            "list",
            "Resources of the queue",
            self._defaults["resources"],
            val.String(),
            nargs="*",
        )
        self._cmdDeprecated("environment", "prerun")
        self._arg(
            "outputstream",
            "str",
            "Output stream.",
            "$REQUEST.oJID[.TASKID]",
            val.String(),
        )
        self._arg(
            "errorstream",
            "str",
            "Error stream.",
            "$REQUEST.eJID[.TASKID]",
            val.String(),
        )
        self._arg(
            "datadir",
            "str",
            "The path in which to store completed trajectories.",
            None,
            val.String(),
        )
        self._arg(
            "trajext",
            "str",
            "Extension of trajectory files. This is needed to copy them to datadir.",
            "xtc",
            val.String(),
        )
        self._arg(
            "envvars",
            "str",
            "Envvars to propagate from submission node to the running node (comma-separated)",
            self._defaults["envvars"],
            val.String(),
        )
        self._arg(
            "prerun",
            "list",
            "Shell commands to execute on the running node before the job (e.g. "
            "loading modules)",
            self._defaults["prerun"],
            val.String(),
            nargs="*",
        )

        # Load SGE configuration profile
        loadConfig(self, "sge", _configfile, _configapp, _logger)

        # Find executables
        if _findExecutables:
            self._qsubmit = SgeQueue._find_binary("qsub")
            self._qinfo = SgeQueue._find_binary("qhost")
            self._qcancel = SgeQueue._find_binary("qdel")
            self._qstatus = SgeQueue._find_binary("qstat")
            self._checkQueue()
Ejemplo n.º 5
0
    def __init__(self,
                 _configapp=None,
                 _configfile=None,
                 _findExecutables=True,
                 _logger=True):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg(
            "version",
            "int",
            "LSF major version",
            self._defaults["version"],
            valid_values=[9, 10],
        )
        self._arg("jobname", "str", "Job name (identifier)", None,
                  val.String())
        self._arg(
            "queue",
            "list",
            "The queue or list of queues to run on. If list, it attempts to submit the job to "
            "the first queue listed",
            self._defaults["queue"],
            val.String(),
            nargs="*",
        )
        self._arg("app", "str", "The application profile",
                  self._defaults["app"], val.String())
        self._arg(
            "ngpu",
            "int",
            "Number of GPUs to use for a single job",
            self._defaults["ngpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "gpu_options",
            "dict",
            "Number of GPUs to use for a single job",
            self._defaults["gpu_options"],
            val.Dictionary(
                key_type=str,
                valid_keys=["mode", "mps", "j_exclusive"],
                value_type={
                    "mode": str,
                    "mps": str,
                    "j_exclusive": str
                },
                valid_values={
                    "mode": ["shared", "exclusive_process"],
                    "mps": ["yes", "no"],
                    "j_exclusive": ["yes", "no"],
                },
            ),
        )
        self._arg(
            "ncpu",
            "int",
            "Number of CPUs to use for a single job",
            self._defaults["ncpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "memory",
            "int",
            "Amount of memory per job (KB)",
            self._defaults["memory"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "walltime",
            "int",
            "Job timeout (hour:min or min)",
            self._defaults["walltime"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "resources",
            "list",
            "Resources of the queue",
            self._defaults["resources"],
            val.String(),
            nargs="*",
        )
        self._cmdDeprecated("environment", "prerun")
        self._arg("outputstream", "str", "Output stream.", "lsf.%J.out",
                  val.String())
        self._arg("errorstream", "str", "Error stream.", "lsf.%J.err",
                  val.String())
        self._arg(
            "datadir",
            "str",
            "The path in which to store completed trajectories.",
            None,
            val.String(),
        )
        self._arg(
            "trajext",
            "str",
            "Extension of trajectory files. This is needed to copy them to datadir.",
            "xtc",
            val.String(),
        )
        self._arg(
            "envvars",
            "str",
            "Envvars to propagate from submission node to the running node (comma-separated)",
            self._defaults["envvars"],
            val.String(),
        )
        self._arg(
            "prerun",
            "list",
            "Shell commands to execute on the running node before the job (e.g. "
            "loading modules)",
            self._defaults["prerun"],
            val.String(),
            nargs="*",
        )

        # Load LSF configuration profile
        loadConfig(self, "lsf", _configfile, _configapp, _logger)

        # Find executables
        if _findExecutables:
            self._qsubmit = LsfQueue._find_binary("bsub")
            self._qinfo = LsfQueue._find_binary("bqueues")
            self._qcancel = LsfQueue._find_binary("bkill")
            self._qstatus = LsfQueue._find_binary("bjobs")
Ejemplo n.º 6
0
    def __init__(
        self, _configapp=None, _configfile=None, _findExecutables=True, _logger=True
    ):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg("jobname", "str", "Job name (identifier)", None, val.String())
        self._arg(
            "partition",
            "str",
            "The queue (partition) or list of queues to run on. If list, the one offering "
            "earliest initiation will be used.",
            self._defaults["partition"],
            val.String(),
            nargs="*",
        )
        self._arg(
            "priority", "str", "Job priority", self._defaults["priority"], val.String()
        )
        self._arg(
            "ngpu",
            "int",
            "Number of GPUs to use for a single job",
            self._defaults["ngpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "ncpu",
            "int",
            "Number of CPUs to use for a single job",
            self._defaults["ncpu"],
            val.Number(int, "POS"),
        )
        self._arg(
            "memory",
            "int",
            "Amount of memory per job (MiB)",
            self._defaults["memory"],
            val.Number(int, "POS"),
        )
        self._arg(
            "gpumemory",
            "int",
            "Only run on GPUs with at least this much memory. Needs special setup of SLURM. "
            "Check how to define gpu_mem on SLURM.",
            None,
            val.Number(int, "0POS"),
        )
        self._arg(
            "walltime",
            "int",
            "Job timeout (minutes)",
            self._defaults["walltime"],
            val.Number(int, "POS"),
        )
        self._cmdDeprecated("environment", "envvars")
        self._arg(
            "mailtype",
            "str",
            "When to send emails. Separate options with commas like 'END,FAIL'.",
            None,
            val.String(),
        )
        self._arg("mailuser", "str", "User email address.", None, val.String())
        self._arg(
            "outputstream", "str", "Output stream.", "slurm.%N.%j.out", val.String()
        )
        self._arg(
            "errorstream", "str", "Error stream.", "slurm.%N.%j.err"
        ), val.String()
        self._arg(
            "datadir",
            "str",
            "The path in which to store completed trajectories.",
            None,
            val.String(),
        )
        self._arg(
            "trajext",
            "str",
            "Extension of trajectory files. This is needed to copy them to datadir.",
            "xtc",
            val.String(),
        )
        self._arg(
            "nodelist",
            "list",
            "A list of nodes on which to run every job at the *same time*! Careful! The jobs"
            " will be duplicated!",
            None,
            val.String(),
            nargs="*",
        )
        self._arg(
            "exclude",
            "list",
            "A list of nodes on which *not* to run the jobs. Use this to select nodes on "
            "which to allow the jobs to run on.",
            None,
            val.String(),
            nargs="*",
        )
        self._arg(
            "envvars",
            "str",
            "Envvars to propagate from submission node to the running node (comma-separated)",
            self._defaults["envvars"],
            val.String(),
        )
        self._arg(
            "prerun",
            "list",
            "Shell commands to execute on the running node before the job (e.g. "
            "loading modules)",
            self._defaults["prerun"],
            val.String(),
            nargs="*",
        )
        self._arg(
            "account",
            "str",
            "Charge resources used by the jobs to specified account.",
            None,
            val.String(),
        )
        self._arg(
            "user",
            "str",
            "The SLURM user submitting and managing jobs",
            getpass.getuser(),
            val.String(),
        )

        # Load Slurm configuration profile
        loadConfig(self, "slurm", _configfile, _configapp, _logger)

        # Find executables
        if _findExecutables:
            self._qsubmit = SlurmQueue._find_binary("sbatch")
            self._qinfo = SlurmQueue._find_binary("sinfo")
            self._qcancel = SlurmQueue._find_binary("scancel")
            self._qstatus = SlurmQueue._find_binary("squeue")
            self._qjobinfo = SlurmQueue._find_binary("sacct")
            self._checkQueue()