Exemple #1
0
    def __init__(self):

        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('groupname', 'str', 'The name of the group of simulations you want to submit. If none is given, '
                                      'a randomly generated string will be used instead.', None, val.String())
        self._arg('datadir', 'str', 'The directory in which to retrieve your results.', None, val.String())
        self._arg('verbose', 'bool', 'Turn verbosity mode on or off.', False, val.Boolean())

        self._cloud = None
Exemple #2
0
    def __init__(self, _configapp=None):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('jobname', 'str', 'Job name (identifier)', None, val.String())
        self._arg('queue', 'str', 'The queue to run on', self._defaults['queue'], val.String())
        self._arg('app', 'str', 'The application profile', self._defaults['app'], val.String())
        self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'],
                  val.Number(int, '0POS'))
        self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'],
                  val.Number(int, '0POS'))
        self._arg('memory', 'int', 'Amount of memory per job (MB)', self._defaults['memory'], val.Number(int, '0POS'))
        self._arg('walltime', 'int', 'Job timeout (hour:min or min)', self._defaults['walltime'], val.Number(int, '0POS'))
        self._arg('resources', 'list', 'Resources of the queue', self._defaults['resources'], val.String(), nargs='*')
        self._arg('environment', 'list', 'Things to run before the job (sourcing envs).', self._defaults['environment'],
                  val.String(), nargs='*')
        self._arg('outputstream', 'str', 'Output stream.', 'lsf.%J.out', val.String())
        self._arg('errorstream', 'str', 'Error stream.', 'lsf.%J.err', val.String())
        self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String())
        self._arg('trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String())

        # Load LSF configuration profile
        lsfconfig = _config['lsf']
        profile = None
        if _configapp is not None:
            if lsfconfig is not None:
                if os.path.isfile(lsfconfig) and lsfconfig.endswith(('.yml', '.yaml')):
                    try:
                        with open(lsfconfig, 'r') as f:
                            profile = yaml.load(f)
                        logger.info('Loaded LSF configuration YAML file {}'.format(lsfconfig))
                    except:
                        logger.warning('Could not load YAML file {}'.format(lsfconfig))
                else:
                    logger.warning('{} does not exist or it is not a YAML file.'.format(lsfconfig))
                if profile:
                    try:
                        properties = profile[_configapp]
                    except:
                        raise RuntimeError('There is no profile in {} for configuration '
                                           'app {}'.format(lsfconfig, _configapp))
                    for p in properties:
                        self.__dict__[p] = properties[p]
                        logger.info('Setting {} to {}'.format(p, properties[p]))
            else:
                raise RuntimeError('No LSF configuration YAML file defined for the configapp')
        else:
            if lsfconfig is not None:
                logger.warning('LSF configuration YAML file defined without configuration app')

        # Find executables
        self._qsubmit = LsfQueue._find_binary('bsub')
        self._qinfo = LsfQueue._find_binary('bqueues')
        self._qcancel = LsfQueue._find_binary('bkill')
        self._qstatus = LsfQueue._find_binary('bjobs')
Exemple #3
0
    def __init__(self):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String())
        self._arg('trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc',
                  val.String())
        self._arg('copy', 'list', 'A list of file names or globs for the files to copy to datadir', ('*.xtc', ),
                  val.String(), nargs='*')
        self._cmdDeprecated('trajext', 'copy')

        self._states = dict()
        self._queue = None
        self._shutdown = False
Exemple #4
0
    def __init__(self):

        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg(
            'groupname', 'str',
            'The name of the group of simulations you want to submit. If none is given, '
            'a randomly generated string will be used instead.', None,
            val.String())
        self._arg('datadir', 'str',
                  'The directory in which to retrieve your results.', None,
                  val.String())
        self._arg('verbose', 'bool', 'Turn verbosity mode on or off.', False,
                  val.Boolean())

        self._cloud = None
Exemple #5
0
    def __init__(self):

        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('groupname', 'str', 'The name of the group of simulations you want to submit. If none is given, '
                                      'a randomly generated string will be used instead.', None, val.String())
        self._arg('datadir', 'str', 'The directory in which to retrieve your results.', None, val.String())
        self._arg('instancetype', 'str', 'Instance type', 'g2.2xlarge', val.String(), valid_values=('g2.2xlarge', 'r4.large', 'p2.xlarge'))
        self._arg('hashnames', 'bool', 'If True, each job will have a name created from the hash of its directory '
                                       'instead of using the directory name.', False, val.Boolean())
        self._arg('verbose', 'bool', 'Turn verbosity mode on or off.', False, val.Boolean())
        self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', 0, val.Number(int, '0POS'))
        self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', 1, val.Number(int, '0POS'))
        self._arg('memory', 'int', 'Amount of memory per job (MB)', 8000, val.Number(int, '0POS'))

        self._cloud = None
Exemple #6
0
    def __init__(self):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('jobname', 'str', 'Job name (identifier)', None,
                  val.String())
        self._arg('queue', 'str', 'The queue to run on',
                  self._defaults[self._defaults['default_queue']],
                  val.String())
        self._arg('ngpu', 'int', 'Number of GPUs to use for a single job',
                  self._defaults['ngpu'], val.Number(int, '0POS'))
        self._arg('ncpu', 'int', 'Number of CPUs to use for a single job',
                  self._defaults['ncpu'], val.Number(int, '0POS'))
        self._arg('memory', 'int', 'Amount of memory per job (MB)',
                  self._defaults['memory'], val.Number(int, '0POS'))
        self._arg('walltime', 'int', 'Job timeout (hour:min or min)',
                  self._defaults['walltime'], val.Number(int, '0POS'))
        self._arg('resources',
                  'list',
                  'Resources of the queue',
                  self._defaults['resources'],
                  val.String(),
                  nargs='*')
        self._arg('environment',
                  'list',
                  'Things to run before the job (sourcing envs).',
                  self._defaults['environment'],
                  val.String(),
                  nargs='*')
        self._arg('outputstream', 'str', 'Output stream.', 'lsf.%J.out',
                  val.String())
        self._arg('errorstream', 'str', 'Error stream.', 'lsf.%J.err',
                  val.String())
        self._arg('datadir', 'str',
                  'The path in which to store completed trajectories.', None,
                  val.String())
        self._arg(
            'trajext', 'str',
            'Extension of trajectory files. This is needed to copy them to datadir.',
            'xtc', val.String())

        # Find executables
        self._qsubmit = LsfQueue._find_binary('bsub')
        self._qinfo = LsfQueue._find_binary('bqueues')
        self._qcancel = LsfQueue._find_binary('bkill')
        self._qstatus = LsfQueue._find_binary('bjobs')
Exemple #7
0
    def __init__(self):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('datadir', 'str',
                  'The path in which to store completed trajectories.', None,
                  val.String())
        self._arg(
            'trajext', 'str',
            'Extension of trajectory files. This is needed to copy them to datadir.',
            'xtc', val.String())
        self._arg(
            'copy',
            'list',
            'A list of file names or globs for the files to copy to datadir',
            ('*.xtc', ),
            val.String(),
            nargs='*')
        self._cmdDeprecated('trajext', 'copy')

        self._states = dict()
        self._queue = None
        self._shutdown = False
Exemple #8
0
    def __init__(self, _configapp=None):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('version',
                  'int',
                  'LSF major version',
                  self._defaults['version'],
                  valid_values=[9, 10])
        self._arg('jobname', 'str', 'Job name (identifier)', None,
                  val.String())
        self._arg(
            'queue',
            'list',
            'The queue or list of queues to run on. If list, it attempts to submit the job to '
            'the first queue listed',
            self._defaults['queue'],
            val.String(),
            nargs='*')
        self._arg('app', 'str', 'The application profile',
                  self._defaults['app'], val.String())
        self._arg('ngpu', 'int', 'Number of GPUs to use for a single job',
                  self._defaults['ngpu'], val.Number(int, '0POS'))
        self._arg(
            'gpu_options', 'dict', 'Number of GPUs to use for a single job',
            self._defaults['gpu_options'],
            val.Dictionary(key_type=str,
                           valid_keys=['mode', 'mps', 'j_exclusive'],
                           value_type={
                               'mode': str,
                               'mps': str,
                               'j_exclusive': str
                           },
                           valid_values={
                               'mode': ['shared', 'exclusive_process'],
                               'mps': ['yes', 'no'],
                               'j_exclusive': ['yes', 'no']
                           }))
        self._arg('ncpu', 'int', 'Number of CPUs to use for a single job',
                  self._defaults['ncpu'], val.Number(int, '0POS'))
        self._arg('memory', 'int', 'Amount of memory per job (MiB)',
                  self._defaults['memory'], val.Number(int, '0POS'))
        self._arg('walltime', 'int', 'Job timeout (hour:min or min)',
                  self._defaults['walltime'], val.Number(int, '0POS'))
        self._arg('resources',
                  'list',
                  'Resources of the queue',
                  self._defaults['resources'],
                  val.String(),
                  nargs='*')
        self._cmdDeprecated('environment', 'prerun')
        self._arg('outputstream', 'str', 'Output stream.', 'lsf.%J.out',
                  val.String())
        self._arg('errorstream', 'str', 'Error stream.', 'lsf.%J.err',
                  val.String())
        self._arg('datadir', 'str',
                  'The path in which to store completed trajectories.', None,
                  val.String())
        self._arg(
            'trajext', 'str',
            'Extension of trajectory files. This is needed to copy them to datadir.',
            'xtc', val.String())
        self._arg(
            'envvars', 'str',
            'Envvars to propagate from submission node to the running node (comma-separated)',
            self._defaults['envvars'], val.String())
        self._arg(
            'prerun',
            'list',
            'Shell commands to execute on the running node before the job (e.g. '
            'loading modules)',
            self._defaults['prerun'],
            val.String(),
            nargs='*')

        # Load LSF configuration profile
        lsfconfig = _config['lsf']
        profile = None
        if _configapp is not None:
            if lsfconfig is not None:
                if os.path.isfile(lsfconfig) and lsfconfig.endswith(
                    ('.yml', '.yaml')):
                    try:
                        with open(lsfconfig, 'r') as f:
                            profile = yaml.load(f)
                        logger.info(
                            'Loaded LSF configuration YAML file {}'.format(
                                lsfconfig))
                    except:
                        logger.warning(
                            'Could not load YAML file {}'.format(lsfconfig))
                else:
                    logger.warning(
                        '{} does not exist or it is not a YAML file.'.format(
                            lsfconfig))
                if profile:
                    try:
                        properties = profile[_configapp]
                    except:
                        raise RuntimeError(
                            'There is no profile in {} for configuration '
                            'app {}'.format(lsfconfig, _configapp))
                    for p in properties:
                        self.__dict__[p] = properties[p]
                        logger.info('Setting {} to {}'.format(
                            p, properties[p]))
            else:
                raise RuntimeError(
                    'No LSF configuration YAML file defined for the configapp')
        else:
            if lsfconfig is not None:
                logger.warning(
                    'LSF configuration YAML file defined without configuration app'
                )

        # Find executables
        self._qsubmit = LsfQueue._find_binary('bsub')
        self._qinfo = LsfQueue._find_binary('bqueues')
        self._qcancel = LsfQueue._find_binary('bkill')
        self._qstatus = LsfQueue._find_binary('bjobs')
Exemple #9
0
    def __init__(
        self, _configapp=None, _configfile=None, _findExecutables=True, _logger=True
    ):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg("jobname", "str", "Job name (identifier)", None, val.String())
        self._arg(
            "partition",
            "str",
            "The queue (partition) or list of queues to run on. If list, the one offering "
            "earliest initiation will be used.",
            self._defaults["partition"],
            val.String(),
            nargs="*",
        )
        self._arg(
            "priority", "str", "Job priority", self._defaults["priority"], val.String()
        )
        self._arg(
            "ngpu",
            "int",
            "Number of GPUs to use for a single job",
            self._defaults["ngpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "ncpu",
            "int",
            "Number of CPUs to use for a single job",
            self._defaults["ncpu"],
            val.Number(int, "POS"),
        )
        self._arg(
            "memory",
            "int",
            "Amount of memory per job (MiB)",
            self._defaults["memory"],
            val.Number(int, "POS"),
        )
        self._arg(
            "gpumemory",
            "int",
            "Only run on GPUs with at least this much memory. Needs special setup of SLURM. "
            "Check how to define gpu_mem on SLURM.",
            None,
            val.Number(int, "0POS"),
        )
        self._arg(
            "walltime",
            "int",
            "Job timeout (minutes)",
            self._defaults["walltime"],
            val.Number(int, "POS"),
        )
        self._cmdDeprecated("environment", "envvars")
        self._arg(
            "mailtype",
            "str",
            "When to send emails. Separate options with commas like 'END,FAIL'.",
            None,
            val.String(),
        )
        self._arg("mailuser", "str", "User email address.", None, val.String())
        self._arg(
            "outputstream", "str", "Output stream.", "slurm.%N.%j.out", val.String()
        )
        self._arg(
            "errorstream", "str", "Error stream.", "slurm.%N.%j.err"
        ), val.String()
        self._arg(
            "datadir",
            "str",
            "The path in which to store completed trajectories.",
            None,
            val.String(),
        )
        self._arg(
            "trajext",
            "str",
            "Extension of trajectory files. This is needed to copy them to datadir.",
            "xtc",
            val.String(),
        )
        self._arg(
            "nodelist",
            "list",
            "A list of nodes on which to run every job at the *same time*! Careful! The jobs"
            " will be duplicated!",
            None,
            val.String(),
            nargs="*",
        )
        self._arg(
            "exclude",
            "list",
            "A list of nodes on which *not* to run the jobs. Use this to select nodes on "
            "which to allow the jobs to run on.",
            None,
            val.String(),
            nargs="*",
        )
        self._arg(
            "envvars",
            "str",
            "Envvars to propagate from submission node to the running node (comma-separated)",
            self._defaults["envvars"],
            val.String(),
        )
        self._arg(
            "prerun",
            "list",
            "Shell commands to execute on the running node before the job (e.g. "
            "loading modules)",
            self._defaults["prerun"],
            val.String(),
            nargs="*",
        )
        self._arg(
            "account",
            "str",
            "Charge resources used by the jobs to specified account.",
            None,
            val.String(),
        )
        self._arg(
            "user",
            "str",
            "The SLURM user submitting and managing jobs",
            getpass.getuser(),
            val.String(),
        )

        # Load Slurm configuration profile
        loadConfig(self, "slurm", _configfile, _configapp, _logger)

        # Find executables
        if _findExecutables:
            self._qsubmit = SlurmQueue._find_binary("sbatch")
            self._qinfo = SlurmQueue._find_binary("sinfo")
            self._qcancel = SlurmQueue._find_binary("scancel")
            self._qstatus = SlurmQueue._find_binary("squeue")
            self._qjobinfo = SlurmQueue._find_binary("sacct")
            self._checkQueue()
Exemple #10
0
 def __init__(self):
     """
     Call super constructor and initialise with name.
     """
     ProtocolInterface.__init__(self, "tcpsocket")
Exemple #11
0
    def __init__(self):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('jobname', 'str', 'Job name (identifier)', None,
                  val.String())
        self._arg('partition', 'str', 'The queue (partition) to run on',
                  self._defaults[self._defaults['default_partition']],
                  val.String())
        self._arg('priority', 'str', 'Job priority',
                  self._defaults['priority'], val.String())
        self._arg('ngpu', 'int', 'Number of GPUs to use for a single job',
                  self._defaults['ngpu'], val.Number(int, '0POS'))
        self._arg('ncpu', 'int', 'Number of CPUs to use for a single job',
                  self._defaults['ncpu'], val.Number(int, 'POS'))
        self._arg('memory', 'int', 'Amount of memory per job (MB)',
                  self._defaults['memory'], val.Number(int, 'POS'))
        self._arg(
            'gpumemory', 'int',
            'Only run on GPUs with at least this much memory. Needs special setup of SLURM. '
            'Check how to define gpu_mem on SLURM.', None,
            val.Number(int, '0POS'))
        self._arg('walltime', 'int', 'Job timeout (s)',
                  self._defaults['walltime'], val.Number(int, 'POS'))
        self._arg('environment', 'str', 'Envvars to propagate to the job.',
                  self._defaults['environment'], val.String())
        self._arg(
            'mailtype', 'str',
            'When to send emails. Separate options with commas like \'END,FAIL\'.',
            None, val.String())
        self._arg('mailuser', 'str', 'User email address.', None, val.String())
        self._arg('outputstream', 'str', 'Output stream.', 'slurm.%N.%j.out',
                  val.String())
        self._arg(
            'errorstream', 'str', 'Error stream.',
            'slurm.%N.%j.err'), val.String()  # Maybe change these to job name
        self._arg('datadir', 'str',
                  'The path in which to store completed trajectories.', None,
                  val.String())
        self._arg(
            'trajext', 'str',
            'Extension of trajectory files. This is needed to copy them to datadir.',
            'xtc', val.String())
        self._arg(
            'nodelist',
            'list',
            'A list of nodes on which to run every job at the *same time*! Careful! The jobs'
            ' will be duplicated!',
            None,
            val.String(),
            nargs='*')
        self._arg(
            'exclude',
            'list',
            'A list of nodes on which *not* to run the jobs. Use this to select nodes on '
            'which to allow the jobs to run on.',
            None,
            val.String(),
            nargs='*')

        # Find executables
        self._qsubmit = SlurmQueue._find_binary('sbatch')
        self._qinfo = SlurmQueue._find_binary('sinfo')
        self._qcancel = SlurmQueue._find_binary('scancel')
        self._qstatus = SlurmQueue._find_binary('squeue')
Exemple #12
0
    def __init__(self,
                 _configapp=None,
                 _configfile=None,
                 _findExecutables=True,
                 _logger=True):
        from playmolecule import Session, Job

        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)

        self._arg(
            "parentjob",
            "playmolecule.job.Job",
            "Spawn all jobs as children of this job",
            default=None,
            required=False,
            validator=val.Object(Job),
        )
        self._arg(
            "session",
            "playmolecule.session.Session",
            "The current PMWS Session object",
            required=True,
            validator=val.Object(Session),
        )
        self._arg("jobname", "str", "Job name (identifier)", None,
                  val.String())
        self._arg("group", "str", "Group name (identifier)", None,
                  val.String())
        self._arg(
            "ngpu",
            "int",
            "Number of GPUs",
            default=0,
            validator=val.Number(int, "0POS"),
        )
        self._arg(
            "ncpu",
            "int",
            "Number of CPUs",
            default=1,
            validator=val.Number(int, "0POS"),
        )
        self._arg(
            "memory",
            "int",
            "Amount of memory (MB)",
            default=1000,
            validator=val.Number(int, "POS"),
        )
        self._arg("app",
                  "str",
                  "App name",
                  required=True,
                  validator=val.String())
        self._arg(
            "configname",
            "str",
            "Name of the file containing the individual job configurations yaml or json. Not a filepath, just the name. All submitted folders must contain this file.",
            None,
            val.String(),
        )
        self._arg(
            "retrievedir",
            "str",
            "Directory in which to retrieve the results of jobs",
            None,
            val.String(),
        )
        self._arg(
            "datadir",
            "str",
            "Directory in which to copy or symlink the output directory.",
            None,
            val.String(),
        )
        self._arg(
            "symlink",
            "bool",
            "Set to False to copy instead of symlink the directories from the retrievedir to datadir",
            True,
            val.Boolean(),
        )
        self._arg(
            "copy",
            "list",
            "A list of file names or globs for the files to copy or symlink from retrievedir to datadir.",
            ("/", ),
            val.String(),
            nargs="*",
        )

        loadConfig(self, "playmolecule", _configfile, _configapp, _logger)
Exemple #13
0
    def __init__(self, _configapp=None):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('jobname', 'str', 'Job name (identifier)', None,
                  val.String())
        self._arg(
            'partition',
            'str',
            'The queue (partition) or list of queues to run on. If list, the one offering '
            'earliest initiation will be used.',
            self._defaults['partition'],
            val.String(),
            nargs='*')
        self._arg('priority', 'str', 'Job priority',
                  self._defaults['priority'], val.String())
        self._arg('ngpu', 'int', 'Number of GPUs to use for a single job',
                  self._defaults['ngpu'], val.Number(int, '0POS'))
        self._arg('ncpu', 'int', 'Number of CPUs to use for a single job',
                  self._defaults['ncpu'], val.Number(int, 'POS'))
        self._arg('memory', 'int', 'Amount of memory per job (MiB)',
                  self._defaults['memory'], val.Number(int, 'POS'))
        self._arg(
            'gpumemory', 'int',
            'Only run on GPUs with at least this much memory. Needs special setup of SLURM. '
            'Check how to define gpu_mem on SLURM.', None,
            val.Number(int, '0POS'))
        self._arg('walltime', 'int', 'Job timeout (minutes)',
                  self._defaults['walltime'], val.Number(int, 'POS'))
        self._cmdDeprecated('environment', 'envvars')
        self._arg(
            'mailtype', 'str',
            'When to send emails. Separate options with commas like \'END,FAIL\'.',
            None, val.String())
        self._arg('mailuser', 'str', 'User email address.', None, val.String())
        self._arg('outputstream', 'str', 'Output stream.', 'slurm.%N.%j.out',
                  val.String())
        self._arg('errorstream', 'str', 'Error stream.',
                  'slurm.%N.%j.err'), val.String()
        self._arg('datadir', 'str',
                  'The path in which to store completed trajectories.', None,
                  val.String())
        self._arg(
            'trajext', 'str',
            'Extension of trajectory files. This is needed to copy them to datadir.',
            'xtc', val.String())
        self._arg(
            'nodelist',
            'list',
            'A list of nodes on which to run every job at the *same time*! Careful! The jobs'
            ' will be duplicated!',
            None,
            val.String(),
            nargs='*')
        self._arg(
            'exclude',
            'list',
            'A list of nodes on which *not* to run the jobs. Use this to select nodes on '
            'which to allow the jobs to run on.',
            None,
            val.String(),
            nargs='*')
        self._arg(
            'envvars', 'str',
            'Envvars to propagate from submission node to the running node (comma-separated)',
            self._defaults['envvars'], val.String())
        self._arg(
            'prerun',
            'list',
            'Shell commands to execute on the running node before the job (e.g. '
            'loading modules)',
            self._defaults['prerun'],
            val.String(),
            nargs='*')
        self._arg('account', 'str',
                  'Charge resources used by the jobs to specified account.',
                  None, val.String())

        # Load Slurm configuration profile
        slurmconfig = _config['slurm']
        profile = None
        if _configapp is not None:
            if slurmconfig is not None:
                if os.path.isfile(slurmconfig) and slurmconfig.endswith(
                    ('.yml', '.yaml')):
                    try:
                        with open(slurmconfig, 'r') as f:
                            profile = yaml.load(f)
                        logger.info(
                            'Loaded Slurm configuration YAML file {}'.format(
                                slurmconfig))
                    except:
                        logger.warning(
                            'Could not load YAML file {}'.format(slurmconfig))
                else:
                    logger.warning(
                        '{} does not exist or it is not a YAML file.'.format(
                            slurmconfig))
                if profile:
                    try:
                        properties = profile[_configapp]
                    except:
                        raise RuntimeError(
                            'There is no profile in {} for configuration '
                            'app {}'.format(slurmconfig, _configapp))
                    for p in properties:
                        setattr(self, p, properties[p])
                        logger.info('Setting {} to {}'.format(
                            p, properties[p]))
            else:
                raise RuntimeError(
                    'No Slurm configuration YAML file defined for the configapp'
                )
        else:
            if slurmconfig is not None:
                logger.warning(
                    'Slurm configuration YAML file defined without configuration app'
                )

        # Find executables
        self._qsubmit = SlurmQueue._find_binary('sbatch')
        self._qinfo = SlurmQueue._find_binary('sinfo')
        self._qcancel = SlurmQueue._find_binary('scancel')
        self._qstatus = SlurmQueue._find_binary('squeue')
Exemple #14
0
    def __init__(self, _configapp=None):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('version', 'int', 'LSF major version', self._defaults['version'], valid_values=[9, 10])
        self._arg('jobname', 'str', 'Job name (identifier)', None, val.String())
        self._arg('queue', 'list', 'The queue or list of queues to run on. If list, it attempts to submit the job to '
                                   'the first queue listed', self._defaults['queue'], val.String(), nargs='*')
        self._arg('app', 'str', 'The application profile', self._defaults['app'], val.String())
        self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'],
                  val.Number(int, '0POS'))
        self._arg('gpu_options', 'dict', 'Number of GPUs to use for a single job', self._defaults['gpu_options'],
                  val.Dictionary(key_type=str, valid_keys=['mode', 'mps', 'j_exclusive'],
                                 value_type={'mode': str, 'mps': str, 'j_exclusive': str},
                                 valid_values={'mode': ['shared', 'exclusive_process'],
                                               'mps': ['yes', 'no'], 'j_exclusive': ['yes', 'no']}
                                 )
                  )
        self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'],
                  val.Number(int, '0POS'))
        self._arg('memory', 'int', 'Amount of memory per job (MiB)', self._defaults['memory'], val.Number(int, '0POS'))
        self._arg('walltime', 'int', 'Job timeout (hour:min or min)', self._defaults['walltime'],
                  val.Number(int, '0POS'))
        self._arg('resources', 'list', 'Resources of the queue', self._defaults['resources'], val.String(), nargs='*')
        self._cmdDeprecated('environment', 'prerun')
        self._arg('outputstream', 'str', 'Output stream.', 'lsf.%J.out', val.String())
        self._arg('errorstream', 'str', 'Error stream.', 'lsf.%J.err', val.String())
        self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String())
        self._arg('trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc',
                  val.String())
        self._arg('envvars', 'str', 'Envvars to propagate from submission node to the running node (comma-separated)',
                  self._defaults['envvars'], val.String())
        self._arg('prerun', 'list', 'Shell commands to execute on the running node before the job (e.g. '
                                    'loading modules)', self._defaults['prerun'], val.String(), nargs='*')

        # Load LSF configuration profile
        lsfconfig = _config['lsf']
        profile = None
        if _configapp is not None:
            if lsfconfig is not None:
                if os.path.isfile(lsfconfig) and lsfconfig.endswith(('.yml', '.yaml')):
                    try:
                        with open(lsfconfig, 'r') as f:
                            profile = yaml.load(f)
                        logger.info('Loaded LSF configuration YAML file {}'.format(lsfconfig))
                    except:
                        logger.warning('Could not load YAML file {}'.format(lsfconfig))
                else:
                    logger.warning('{} does not exist or it is not a YAML file.'.format(lsfconfig))
                if profile:
                    try:
                        properties = profile[_configapp]
                    except:
                        raise RuntimeError('There is no profile in {} for configuration '
                                           'app {}'.format(lsfconfig, _configapp))
                    for p in properties:
                        self.__dict__[p] = properties[p]
                        logger.info('Setting {} to {}'.format(p, properties[p]))
            else:
                raise RuntimeError('No LSF configuration YAML file defined for the configapp')
        else:
            if lsfconfig is not None:
                logger.warning('LSF configuration YAML file defined without configuration app')

        # Find executables
        self._qsubmit = LsfQueue._find_binary('bsub')
        self._qinfo = LsfQueue._find_binary('bqueues')
        self._qcancel = LsfQueue._find_binary('bkill')
        self._qstatus = LsfQueue._find_binary('bjobs')
Exemple #15
0
    def __init__(self, _configapp=None):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg('jobname', 'str', 'Job name (identifier)', None, val.String())
        self._arg('partition', 'str', 'The queue (partition) to run on', self._defaults['partition'], val.String())
        self._arg('priority', 'str', 'Job priority', self._defaults['priority'], val.String())
        self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'],
                  val.Number(int, '0POS'))
        self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'],
                  val.Number(int, 'POS'))
        self._arg('memory', 'int', 'Amount of memory per job (MB)', self._defaults['memory'], val.Number(int, 'POS'))
        self._arg('gpumemory', 'int', 'Only run on GPUs with at least this much memory. Needs special setup of SLURM. '
                                      'Check how to define gpu_mem on SLURM.', None, val.Number(int, '0POS'))
        self._arg('walltime', 'int', 'Job timeout (s)', self._defaults['walltime'], val.Number(int, 'POS'))
        self._arg('environment', 'str', 'Envvars to propagate to the job.', self._defaults['environment'], val.String())
        self._arg('mailtype', 'str', 'When to send emails. Separate options with commas like \'END,FAIL\'.', None, val.String())
        self._arg('mailuser', 'str', 'User email address.', None, val.String())
        self._arg('outputstream', 'str', 'Output stream.', 'slurm.%N.%j.out', val.String())
        self._arg('errorstream', 'str', 'Error stream.', 'slurm.%N.%j.err'), val.String()  # Maybe change these to job name
        self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String())
        self._arg('trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc',
                  val.String())
        self._arg('nodelist', 'list', 'A list of nodes on which to run every job at the *same time*! Careful! The jobs'
                                      ' will be duplicated!', None, val.String(), nargs='*')
        self._arg('exclude', 'list', 'A list of nodes on which *not* to run the jobs. Use this to select nodes on '
                                     'which to allow the jobs to run on.', None, val.String(), nargs='*')

        # Load Slurm configuration profile
        slurmconfig = _config['slurm']
        profile = None
        if _configapp is not None:
            if slurmconfig is not None:
                if os.path.isfile(slurmconfig) and slurmconfig.endswith(('.yml', '.yaml')):
                    try:
                        with open(slurmconfig, 'r') as f:
                            profile = yaml.load(f)
                        logger.info('Loaded Slurm configuration YAML file {}'.format(slurmconfig))
                    except:
                        logger.warning('Could not load YAML file {}'.format(slurmconfig))
                else:
                    logger.warning('{} does not exist or it is not a YAML file.'.format(slurmconfig))
                if profile:
                    try:
                        properties = profile[_configapp]
                    except:
                        raise RuntimeError('There is no profile in {} for configuration '
                                           'app {}'.format(slurmconfig, _configapp))
                    for p in properties:
                        self.__dict__[p] = properties[p]
                        logger.info('Setting {} to {}'.format(p, properties[p]))
            else:
                raise RuntimeError('No Slurm configuration YAML file defined for the configapp')
        else:
            if slurmconfig is not None:
                logger.warning('Slurm configuration YAML file defined without configuration app')


        # Find executables
        self._qsubmit = SlurmQueue._find_binary('sbatch')
        self._qinfo = SlurmQueue._find_binary('sinfo')
        self._qcancel = SlurmQueue._find_binary('scancel')
        self._qstatus = SlurmQueue._find_binary('squeue')
Exemple #16
0
    def __init__(self,
                 _configapp=None,
                 _configfile=None,
                 _findExecutables=True,
                 _logger=True):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg(
            "version",
            "int",
            "LSF major version",
            self._defaults["version"],
            valid_values=[9, 10],
        )
        self._arg("jobname", "str", "Job name (identifier)", None,
                  val.String())
        self._arg(
            "queue",
            "list",
            "The queue or list of queues to run on. If list, it attempts to submit the job to "
            "the first queue listed",
            self._defaults["queue"],
            val.String(),
            nargs="*",
        )
        self._arg("app", "str", "The application profile",
                  self._defaults["app"], val.String())
        self._arg(
            "ngpu",
            "int",
            "Number of GPUs to use for a single job",
            self._defaults["ngpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "gpu_options",
            "dict",
            "Number of GPUs to use for a single job",
            self._defaults["gpu_options"],
            val.Dictionary(
                key_type=str,
                valid_keys=["mode", "mps", "j_exclusive"],
                value_type={
                    "mode": str,
                    "mps": str,
                    "j_exclusive": str
                },
                valid_values={
                    "mode": ["shared", "exclusive_process"],
                    "mps": ["yes", "no"],
                    "j_exclusive": ["yes", "no"],
                },
            ),
        )
        self._arg(
            "ncpu",
            "int",
            "Number of CPUs to use for a single job",
            self._defaults["ncpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "memory",
            "int",
            "Amount of memory per job (KB)",
            self._defaults["memory"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "walltime",
            "int",
            "Job timeout (hour:min or min)",
            self._defaults["walltime"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "resources",
            "list",
            "Resources of the queue",
            self._defaults["resources"],
            val.String(),
            nargs="*",
        )
        self._cmdDeprecated("environment", "prerun")
        self._arg("outputstream", "str", "Output stream.", "lsf.%J.out",
                  val.String())
        self._arg("errorstream", "str", "Error stream.", "lsf.%J.err",
                  val.String())
        self._arg(
            "datadir",
            "str",
            "The path in which to store completed trajectories.",
            None,
            val.String(),
        )
        self._arg(
            "trajext",
            "str",
            "Extension of trajectory files. This is needed to copy them to datadir.",
            "xtc",
            val.String(),
        )
        self._arg(
            "envvars",
            "str",
            "Envvars to propagate from submission node to the running node (comma-separated)",
            self._defaults["envvars"],
            val.String(),
        )
        self._arg(
            "prerun",
            "list",
            "Shell commands to execute on the running node before the job (e.g. "
            "loading modules)",
            self._defaults["prerun"],
            val.String(),
            nargs="*",
        )

        # Load LSF configuration profile
        loadConfig(self, "lsf", _configfile, _configapp, _logger)

        # Find executables
        if _findExecutables:
            self._qsubmit = LsfQueue._find_binary("bsub")
            self._qinfo = LsfQueue._find_binary("bqueues")
            self._qcancel = LsfQueue._find_binary("bkill")
            self._qstatus = LsfQueue._find_binary("bjobs")
Exemple #17
0
    def __init__(self,
                 _configapp=None,
                 _configfile=None,
                 _findExecutables=True,
                 _logger=True):
        SimQueue.__init__(self)
        ProtocolInterface.__init__(self)
        self._arg("jobname", "str", "Job name (identifier)", None,
                  val.String())
        self._arg(
            "queue",
            "list",
            "The queue or list of queues to run on. If list, it attempts to submit the job to "
            "the first queue listed",
            self._defaults["queue"],
            val.String(),
            nargs="*",
        )
        self._arg(
            "ngpu",
            "int",
            "Number of GPUs to use for a single job",
            self._defaults["ngpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "ncpu",
            "int",
            "Number of CPUs to use for a single job",
            self._defaults["ncpu"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "memory",
            "int",
            "Amount of memory per job (MiB)",
            self._defaults["memory"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "walltime",
            "int",
            "Job timeout (hour:min or min)",
            self._defaults["walltime"],
            val.Number(int, "0POS"),
        )
        self._arg(
            "pe",
            "str",
            "SGE Parallel Environment",
            self._defaults["pe"],
            val.String(),
        )
        self._arg(
            "resources",
            "list",
            "Resources of the queue",
            self._defaults["resources"],
            val.String(),
            nargs="*",
        )
        self._cmdDeprecated("environment", "prerun")
        self._arg(
            "outputstream",
            "str",
            "Output stream.",
            "$REQUEST.oJID[.TASKID]",
            val.String(),
        )
        self._arg(
            "errorstream",
            "str",
            "Error stream.",
            "$REQUEST.eJID[.TASKID]",
            val.String(),
        )
        self._arg(
            "datadir",
            "str",
            "The path in which to store completed trajectories.",
            None,
            val.String(),
        )
        self._arg(
            "trajext",
            "str",
            "Extension of trajectory files. This is needed to copy them to datadir.",
            "xtc",
            val.String(),
        )
        self._arg(
            "envvars",
            "str",
            "Envvars to propagate from submission node to the running node (comma-separated)",
            self._defaults["envvars"],
            val.String(),
        )
        self._arg(
            "prerun",
            "list",
            "Shell commands to execute on the running node before the job (e.g. "
            "loading modules)",
            self._defaults["prerun"],
            val.String(),
            nargs="*",
        )

        # Load SGE configuration profile
        loadConfig(self, "sge", _configfile, _configapp, _logger)

        # Find executables
        if _findExecutables:
            self._qsubmit = SgeQueue._find_binary("qsub")
            self._qinfo = SgeQueue._find_binary("qhost")
            self._qcancel = SgeQueue._find_binary("qdel")
            self._qstatus = SgeQueue._find_binary("qstat")
            self._checkQueue()