def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('ngpu', 'int', 'Number of GPUs', default=0, validator=Number(int, '0POS')) self._arg('ncpu', 'int', 'Number of CPUs', default=1, validator=Number(int, '0POS')) self._arg('memory', 'int', 'Amount of memory (MB)', default=1000, validator=Number(int, 'POS')) self._arg('max_jobs', 'int', 'Maximum number of concurent jobs', default=sys.maxsize, validator=Number(int, 'POS')) self._arg('token', 'str', 'PM token', required=True, validator=String()) self._arg('app', 'str', 'App name', required=True, validator=String()) self._dirs = {}
def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg( 'groupname', 'str', 'The name of the group of simulations you want to submit. If none is given, ' 'a randomly generated string will be used instead.', None, val.String()) self._arg('datadir', 'str', 'The directory in which to retrieve your results.', None, val.String()) self._arg('instancetype', 'str', 'Instance type', 'p2.xlarge', val.String(), valid_values=('g2.2xlarge', 'r4.large', 'p2.xlarge')) self._arg( 'hashnames', 'bool', 'If True, each job will have a name created from the hash of its directory ' 'instead of using the directory name.', False, val.Boolean()) self._arg('verbose', 'bool', 'Turn verbosity mode on or off.', False, val.Boolean()) self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', 0, val.Number(int, '0POS')) self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', 1, val.Number(int, '0POS')) self._arg('memory', 'int', 'Amount of memory per job (MB)', 8000, val.Number(int, '0POS')) self._cloud = None
def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg( "datadir", "str", "The path in which to store completed trajectories.", None, val.String(), ) self._arg( "trajext", "str", "Extension of trajectory files. This is needed to copy them to datadir.", "xtc", val.String(), ) self._arg( "copy", "list", "A list of file names or globs for the files to copy to datadir", ("*.xtc", ), val.String(), nargs="*", ) self._cmdDeprecated("trajext", "copy") self._states = dict() self._queue = None self._shutdown = False
def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('groupname', 'str', 'The name of the group of simulations you want to submit. If none is given, ' 'a randomly generated string will be used instead.', None, val.String()) self._arg('datadir', 'str', 'The directory in which to retrieve your results.', None, val.String()) self._arg('verbose', 'bool', 'Turn verbosity mode on or off.', False, val.Boolean()) self._cloud = None
def __init__(self, _configapp=None): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('jobname', 'str', 'Job name (identifier)', None, val.String()) self._arg('queue', 'str', 'The queue to run on', self._defaults['queue'], val.String()) self._arg('app', 'str', 'The application profile', self._defaults['app'], val.String()) self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'], val.Number(int, '0POS')) self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'], val.Number(int, '0POS')) self._arg('memory', 'int', 'Amount of memory per job (MB)', self._defaults['memory'], val.Number(int, '0POS')) self._arg('walltime', 'int', 'Job timeout (hour:min or min)', self._defaults['walltime'], val.Number(int, '0POS')) self._arg('resources', 'list', 'Resources of the queue', self._defaults['resources'], val.String(), nargs='*') self._arg('environment', 'list', 'Things to run before the job (sourcing envs).', self._defaults['environment'], val.String(), nargs='*') self._arg('outputstream', 'str', 'Output stream.', 'lsf.%J.out', val.String()) self._arg('errorstream', 'str', 'Error stream.', 'lsf.%J.err', val.String()) self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg('trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) # Load LSF configuration profile lsfconfig = _config['lsf'] profile = None if _configapp is not None: if lsfconfig is not None: if os.path.isfile(lsfconfig) and lsfconfig.endswith(('.yml', '.yaml')): try: with open(lsfconfig, 'r') as f: profile = yaml.load(f) logger.info('Loaded LSF configuration YAML file {}'.format(lsfconfig)) except: logger.warning('Could not load YAML file {}'.format(lsfconfig)) else: logger.warning('{} does not exist or it is not a YAML file.'.format(lsfconfig)) if profile: try: properties = profile[_configapp] except: raise RuntimeError('There is no profile in {} for configuration ' 'app {}'.format(lsfconfig, _configapp)) for p in properties: self.__dict__[p] = properties[p] logger.info('Setting {} to {}'.format(p, properties[p])) else: raise RuntimeError('No LSF configuration YAML file defined for the configapp') else: if lsfconfig is not None: logger.warning('LSF configuration YAML file defined without configuration app') # Find executables self._qsubmit = LsfQueue._find_binary('bsub') self._qinfo = LsfQueue._find_binary('bqueues') self._qcancel = LsfQueue._find_binary('bkill') self._qstatus = LsfQueue._find_binary('bjobs')
def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg('trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) self._arg('copy', 'list', 'A list of file names or globs for the files to copy to datadir', ('*.xtc', ), val.String(), nargs='*') self._cmdDeprecated('trajext', 'copy') self._states = dict() self._queue = None self._shutdown = False
def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg( 'groupname', 'str', 'The name of the group of simulations you want to submit. If none is given, ' 'a randomly generated string will be used instead.', None, val.String()) self._arg('datadir', 'str', 'The directory in which to retrieve your results.', None, val.String()) self._arg('verbose', 'bool', 'Turn verbosity mode on or off.', False, val.Boolean()) self._cloud = None
def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('jobname', 'str', 'Job name (identifier)', None, val.String()) self._arg('queue', 'str', 'The queue to run on', self._defaults[self._defaults['default_queue']], val.String()) self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'], val.Number(int, '0POS')) self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'], val.Number(int, '0POS')) self._arg('memory', 'int', 'Amount of memory per job (MB)', self._defaults['memory'], val.Number(int, '0POS')) self._arg('walltime', 'int', 'Job timeout (hour:min or min)', self._defaults['walltime'], val.Number(int, '0POS')) self._arg('resources', 'list', 'Resources of the queue', self._defaults['resources'], val.String(), nargs='*') self._arg('environment', 'list', 'Things to run before the job (sourcing envs).', self._defaults['environment'], val.String(), nargs='*') self._arg('outputstream', 'str', 'Output stream.', 'lsf.%J.out', val.String()) self._arg('errorstream', 'str', 'Error stream.', 'lsf.%J.err', val.String()) self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg( 'trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) # Find executables self._qsubmit = LsfQueue._find_binary('bsub') self._qinfo = LsfQueue._find_binary('bqueues') self._qcancel = LsfQueue._find_binary('bkill') self._qstatus = LsfQueue._find_binary('bjobs')
def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg( 'trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) self._arg( 'copy', 'list', 'A list of file names or globs for the files to copy to datadir', ('*.xtc', ), val.String(), nargs='*') self._cmdDeprecated('trajext', 'copy') self._states = dict() self._queue = None self._shutdown = False
def __init__(self, _configapp=None): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('version', 'int', 'LSF major version', self._defaults['version'], valid_values=[9, 10]) self._arg('jobname', 'str', 'Job name (identifier)', None, val.String()) self._arg( 'queue', 'list', 'The queue or list of queues to run on. If list, it attempts to submit the job to ' 'the first queue listed', self._defaults['queue'], val.String(), nargs='*') self._arg('app', 'str', 'The application profile', self._defaults['app'], val.String()) self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'], val.Number(int, '0POS')) self._arg( 'gpu_options', 'dict', 'Number of GPUs to use for a single job', self._defaults['gpu_options'], val.Dictionary(key_type=str, valid_keys=['mode', 'mps', 'j_exclusive'], value_type={ 'mode': str, 'mps': str, 'j_exclusive': str }, valid_values={ 'mode': ['shared', 'exclusive_process'], 'mps': ['yes', 'no'], 'j_exclusive': ['yes', 'no'] })) self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'], val.Number(int, '0POS')) self._arg('memory', 'int', 'Amount of memory per job (MiB)', self._defaults['memory'], val.Number(int, '0POS')) self._arg('walltime', 'int', 'Job timeout (hour:min or min)', self._defaults['walltime'], val.Number(int, '0POS')) self._arg('resources', 'list', 'Resources of the queue', self._defaults['resources'], val.String(), nargs='*') self._cmdDeprecated('environment', 'prerun') self._arg('outputstream', 'str', 'Output stream.', 'lsf.%J.out', val.String()) self._arg('errorstream', 'str', 'Error stream.', 'lsf.%J.err', val.String()) self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg( 'trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) self._arg( 'envvars', 'str', 'Envvars to propagate from submission node to the running node (comma-separated)', self._defaults['envvars'], val.String()) self._arg( 'prerun', 'list', 'Shell commands to execute on the running node before the job (e.g. ' 'loading modules)', self._defaults['prerun'], val.String(), nargs='*') # Load LSF configuration profile lsfconfig = _config['lsf'] profile = None if _configapp is not None: if lsfconfig is not None: if os.path.isfile(lsfconfig) and lsfconfig.endswith( ('.yml', '.yaml')): try: with open(lsfconfig, 'r') as f: profile = yaml.load(f) logger.info( 'Loaded LSF configuration YAML file {}'.format( lsfconfig)) except: logger.warning( 'Could not load YAML file {}'.format(lsfconfig)) else: logger.warning( '{} does not exist or it is not a YAML file.'.format( lsfconfig)) if profile: try: properties = profile[_configapp] except: raise RuntimeError( 'There is no profile in {} for configuration ' 'app {}'.format(lsfconfig, _configapp)) for p in properties: self.__dict__[p] = properties[p] logger.info('Setting {} to {}'.format( p, properties[p])) else: raise RuntimeError( 'No LSF configuration YAML file defined for the configapp') else: if lsfconfig is not None: logger.warning( 'LSF configuration YAML file defined without configuration app' ) # Find executables self._qsubmit = LsfQueue._find_binary('bsub') self._qinfo = LsfQueue._find_binary('bqueues') self._qcancel = LsfQueue._find_binary('bkill') self._qstatus = LsfQueue._find_binary('bjobs')
def __init__(self): """ Call super constructor and initialise with name. """ ProtocolInterface.__init__(self, "tcpsocket")
def __init__( self, _configapp=None, _configfile=None, _findExecutables=True, _logger=True ): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg("jobname", "str", "Job name (identifier)", None, val.String()) self._arg( "partition", "str", "The queue (partition) or list of queues to run on. If list, the one offering " "earliest initiation will be used.", self._defaults["partition"], val.String(), nargs="*", ) self._arg( "priority", "str", "Job priority", self._defaults["priority"], val.String() ) self._arg( "ngpu", "int", "Number of GPUs to use for a single job", self._defaults["ngpu"], val.Number(int, "0POS"), ) self._arg( "ncpu", "int", "Number of CPUs to use for a single job", self._defaults["ncpu"], val.Number(int, "POS"), ) self._arg( "memory", "int", "Amount of memory per job (MiB)", self._defaults["memory"], val.Number(int, "POS"), ) self._arg( "gpumemory", "int", "Only run on GPUs with at least this much memory. Needs special setup of SLURM. " "Check how to define gpu_mem on SLURM.", None, val.Number(int, "0POS"), ) self._arg( "walltime", "int", "Job timeout (minutes)", self._defaults["walltime"], val.Number(int, "POS"), ) self._cmdDeprecated("environment", "envvars") self._arg( "mailtype", "str", "When to send emails. Separate options with commas like 'END,FAIL'.", None, val.String(), ) self._arg("mailuser", "str", "User email address.", None, val.String()) self._arg( "outputstream", "str", "Output stream.", "slurm.%N.%j.out", val.String() ) self._arg( "errorstream", "str", "Error stream.", "slurm.%N.%j.err" ), val.String() self._arg( "datadir", "str", "The path in which to store completed trajectories.", None, val.String(), ) self._arg( "trajext", "str", "Extension of trajectory files. This is needed to copy them to datadir.", "xtc", val.String(), ) self._arg( "nodelist", "list", "A list of nodes on which to run every job at the *same time*! Careful! The jobs" " will be duplicated!", None, val.String(), nargs="*", ) self._arg( "exclude", "list", "A list of nodes on which *not* to run the jobs. Use this to select nodes on " "which to allow the jobs to run on.", None, val.String(), nargs="*", ) self._arg( "envvars", "str", "Envvars to propagate from submission node to the running node (comma-separated)", self._defaults["envvars"], val.String(), ) self._arg( "prerun", "list", "Shell commands to execute on the running node before the job (e.g. " "loading modules)", self._defaults["prerun"], val.String(), nargs="*", ) self._arg( "account", "str", "Charge resources used by the jobs to specified account.", None, val.String(), ) self._arg( "user", "str", "The SLURM user submitting and managing jobs", getpass.getuser(), val.String(), ) # Load Slurm configuration profile loadConfig(self, "slurm", _configfile, _configapp, _logger) # Find executables if _findExecutables: self._qsubmit = SlurmQueue._find_binary("sbatch") self._qinfo = SlurmQueue._find_binary("sinfo") self._qcancel = SlurmQueue._find_binary("scancel") self._qstatus = SlurmQueue._find_binary("squeue") self._qjobinfo = SlurmQueue._find_binary("sacct") self._checkQueue()
def __init__(self): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('jobname', 'str', 'Job name (identifier)', None, val.String()) self._arg('partition', 'str', 'The queue (partition) to run on', self._defaults[self._defaults['default_partition']], val.String()) self._arg('priority', 'str', 'Job priority', self._defaults['priority'], val.String()) self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'], val.Number(int, '0POS')) self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'], val.Number(int, 'POS')) self._arg('memory', 'int', 'Amount of memory per job (MB)', self._defaults['memory'], val.Number(int, 'POS')) self._arg( 'gpumemory', 'int', 'Only run on GPUs with at least this much memory. Needs special setup of SLURM. ' 'Check how to define gpu_mem on SLURM.', None, val.Number(int, '0POS')) self._arg('walltime', 'int', 'Job timeout (s)', self._defaults['walltime'], val.Number(int, 'POS')) self._arg('environment', 'str', 'Envvars to propagate to the job.', self._defaults['environment'], val.String()) self._arg( 'mailtype', 'str', 'When to send emails. Separate options with commas like \'END,FAIL\'.', None, val.String()) self._arg('mailuser', 'str', 'User email address.', None, val.String()) self._arg('outputstream', 'str', 'Output stream.', 'slurm.%N.%j.out', val.String()) self._arg( 'errorstream', 'str', 'Error stream.', 'slurm.%N.%j.err'), val.String() # Maybe change these to job name self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg( 'trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) self._arg( 'nodelist', 'list', 'A list of nodes on which to run every job at the *same time*! Careful! The jobs' ' will be duplicated!', None, val.String(), nargs='*') self._arg( 'exclude', 'list', 'A list of nodes on which *not* to run the jobs. Use this to select nodes on ' 'which to allow the jobs to run on.', None, val.String(), nargs='*') # Find executables self._qsubmit = SlurmQueue._find_binary('sbatch') self._qinfo = SlurmQueue._find_binary('sinfo') self._qcancel = SlurmQueue._find_binary('scancel') self._qstatus = SlurmQueue._find_binary('squeue')
def __init__(self, _configapp=None, _configfile=None, _findExecutables=True, _logger=True): from playmolecule import Session, Job SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg( "parentjob", "playmolecule.job.Job", "Spawn all jobs as children of this job", default=None, required=False, validator=val.Object(Job), ) self._arg( "session", "playmolecule.session.Session", "The current PMWS Session object", required=True, validator=val.Object(Session), ) self._arg("jobname", "str", "Job name (identifier)", None, val.String()) self._arg("group", "str", "Group name (identifier)", None, val.String()) self._arg( "ngpu", "int", "Number of GPUs", default=0, validator=val.Number(int, "0POS"), ) self._arg( "ncpu", "int", "Number of CPUs", default=1, validator=val.Number(int, "0POS"), ) self._arg( "memory", "int", "Amount of memory (MB)", default=1000, validator=val.Number(int, "POS"), ) self._arg("app", "str", "App name", required=True, validator=val.String()) self._arg( "configname", "str", "Name of the file containing the individual job configurations yaml or json. Not a filepath, just the name. All submitted folders must contain this file.", None, val.String(), ) self._arg( "retrievedir", "str", "Directory in which to retrieve the results of jobs", None, val.String(), ) self._arg( "datadir", "str", "Directory in which to copy or symlink the output directory.", None, val.String(), ) self._arg( "symlink", "bool", "Set to False to copy instead of symlink the directories from the retrievedir to datadir", True, val.Boolean(), ) self._arg( "copy", "list", "A list of file names or globs for the files to copy or symlink from retrievedir to datadir.", ("/", ), val.String(), nargs="*", ) loadConfig(self, "playmolecule", _configfile, _configapp, _logger)
def __init__(self, _configapp=None): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('jobname', 'str', 'Job name (identifier)', None, val.String()) self._arg('partition', 'str', 'The queue (partition) to run on', self._defaults['partition'], val.String()) self._arg('priority', 'str', 'Job priority', self._defaults['priority'], val.String()) self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'], val.Number(int, '0POS')) self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'], val.Number(int, 'POS')) self._arg('memory', 'int', 'Amount of memory per job (MB)', self._defaults['memory'], val.Number(int, 'POS')) self._arg('gpumemory', 'int', 'Only run on GPUs with at least this much memory. Needs special setup of SLURM. ' 'Check how to define gpu_mem on SLURM.', None, val.Number(int, '0POS')) self._arg('walltime', 'int', 'Job timeout (s)', self._defaults['walltime'], val.Number(int, 'POS')) self._arg('environment', 'str', 'Envvars to propagate to the job.', self._defaults['environment'], val.String()) self._arg('mailtype', 'str', 'When to send emails. Separate options with commas like \'END,FAIL\'.', None, val.String()) self._arg('mailuser', 'str', 'User email address.', None, val.String()) self._arg('outputstream', 'str', 'Output stream.', 'slurm.%N.%j.out', val.String()) self._arg('errorstream', 'str', 'Error stream.', 'slurm.%N.%j.err'), val.String() # Maybe change these to job name self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg('trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) self._arg('nodelist', 'list', 'A list of nodes on which to run every job at the *same time*! Careful! The jobs' ' will be duplicated!', None, val.String(), nargs='*') self._arg('exclude', 'list', 'A list of nodes on which *not* to run the jobs. Use this to select nodes on ' 'which to allow the jobs to run on.', None, val.String(), nargs='*') # Load Slurm configuration profile slurmconfig = _config['slurm'] profile = None if _configapp is not None: if slurmconfig is not None: if os.path.isfile(slurmconfig) and slurmconfig.endswith(('.yml', '.yaml')): try: with open(slurmconfig, 'r') as f: profile = yaml.load(f) logger.info('Loaded Slurm configuration YAML file {}'.format(slurmconfig)) except: logger.warning('Could not load YAML file {}'.format(slurmconfig)) else: logger.warning('{} does not exist or it is not a YAML file.'.format(slurmconfig)) if profile: try: properties = profile[_configapp] except: raise RuntimeError('There is no profile in {} for configuration ' 'app {}'.format(slurmconfig, _configapp)) for p in properties: self.__dict__[p] = properties[p] logger.info('Setting {} to {}'.format(p, properties[p])) else: raise RuntimeError('No Slurm configuration YAML file defined for the configapp') else: if slurmconfig is not None: logger.warning('Slurm configuration YAML file defined without configuration app') # Find executables self._qsubmit = SlurmQueue._find_binary('sbatch') self._qinfo = SlurmQueue._find_binary('sinfo') self._qcancel = SlurmQueue._find_binary('scancel') self._qstatus = SlurmQueue._find_binary('squeue')
def __init__(self, _configapp=None): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('jobname', 'str', 'Job name (identifier)', None, val.String()) self._arg( 'partition', 'str', 'The queue (partition) or list of queues to run on. If list, the one offering ' 'earliest initiation will be used.', self._defaults['partition'], val.String(), nargs='*') self._arg('priority', 'str', 'Job priority', self._defaults['priority'], val.String()) self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'], val.Number(int, '0POS')) self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'], val.Number(int, 'POS')) self._arg('memory', 'int', 'Amount of memory per job (MiB)', self._defaults['memory'], val.Number(int, 'POS')) self._arg( 'gpumemory', 'int', 'Only run on GPUs with at least this much memory. Needs special setup of SLURM. ' 'Check how to define gpu_mem on SLURM.', None, val.Number(int, '0POS')) self._arg('walltime', 'int', 'Job timeout (minutes)', self._defaults['walltime'], val.Number(int, 'POS')) self._cmdDeprecated('environment', 'envvars') self._arg( 'mailtype', 'str', 'When to send emails. Separate options with commas like \'END,FAIL\'.', None, val.String()) self._arg('mailuser', 'str', 'User email address.', None, val.String()) self._arg('outputstream', 'str', 'Output stream.', 'slurm.%N.%j.out', val.String()) self._arg('errorstream', 'str', 'Error stream.', 'slurm.%N.%j.err'), val.String() self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg( 'trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) self._arg( 'nodelist', 'list', 'A list of nodes on which to run every job at the *same time*! Careful! The jobs' ' will be duplicated!', None, val.String(), nargs='*') self._arg( 'exclude', 'list', 'A list of nodes on which *not* to run the jobs. Use this to select nodes on ' 'which to allow the jobs to run on.', None, val.String(), nargs='*') self._arg( 'envvars', 'str', 'Envvars to propagate from submission node to the running node (comma-separated)', self._defaults['envvars'], val.String()) self._arg( 'prerun', 'list', 'Shell commands to execute on the running node before the job (e.g. ' 'loading modules)', self._defaults['prerun'], val.String(), nargs='*') self._arg('account', 'str', 'Charge resources used by the jobs to specified account.', None, val.String()) # Load Slurm configuration profile slurmconfig = _config['slurm'] profile = None if _configapp is not None: if slurmconfig is not None: if os.path.isfile(slurmconfig) and slurmconfig.endswith( ('.yml', '.yaml')): try: with open(slurmconfig, 'r') as f: profile = yaml.load(f) logger.info( 'Loaded Slurm configuration YAML file {}'.format( slurmconfig)) except: logger.warning( 'Could not load YAML file {}'.format(slurmconfig)) else: logger.warning( '{} does not exist or it is not a YAML file.'.format( slurmconfig)) if profile: try: properties = profile[_configapp] except: raise RuntimeError( 'There is no profile in {} for configuration ' 'app {}'.format(slurmconfig, _configapp)) for p in properties: setattr(self, p, properties[p]) logger.info('Setting {} to {}'.format( p, properties[p])) else: raise RuntimeError( 'No Slurm configuration YAML file defined for the configapp' ) else: if slurmconfig is not None: logger.warning( 'Slurm configuration YAML file defined without configuration app' ) # Find executables self._qsubmit = SlurmQueue._find_binary('sbatch') self._qinfo = SlurmQueue._find_binary('sinfo') self._qcancel = SlurmQueue._find_binary('scancel') self._qstatus = SlurmQueue._find_binary('squeue')
def __init__(self, _configapp=None): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg('version', 'int', 'LSF major version', self._defaults['version'], valid_values=[9, 10]) self._arg('jobname', 'str', 'Job name (identifier)', None, val.String()) self._arg('queue', 'list', 'The queue or list of queues to run on. If list, it attempts to submit the job to ' 'the first queue listed', self._defaults['queue'], val.String(), nargs='*') self._arg('app', 'str', 'The application profile', self._defaults['app'], val.String()) self._arg('ngpu', 'int', 'Number of GPUs to use for a single job', self._defaults['ngpu'], val.Number(int, '0POS')) self._arg('gpu_options', 'dict', 'Number of GPUs to use for a single job', self._defaults['gpu_options'], val.Dictionary(key_type=str, valid_keys=['mode', 'mps', 'j_exclusive'], value_type={'mode': str, 'mps': str, 'j_exclusive': str}, valid_values={'mode': ['shared', 'exclusive_process'], 'mps': ['yes', 'no'], 'j_exclusive': ['yes', 'no']} ) ) self._arg('ncpu', 'int', 'Number of CPUs to use for a single job', self._defaults['ncpu'], val.Number(int, '0POS')) self._arg('memory', 'int', 'Amount of memory per job (MiB)', self._defaults['memory'], val.Number(int, '0POS')) self._arg('walltime', 'int', 'Job timeout (hour:min or min)', self._defaults['walltime'], val.Number(int, '0POS')) self._arg('resources', 'list', 'Resources of the queue', self._defaults['resources'], val.String(), nargs='*') self._cmdDeprecated('environment', 'prerun') self._arg('outputstream', 'str', 'Output stream.', 'lsf.%J.out', val.String()) self._arg('errorstream', 'str', 'Error stream.', 'lsf.%J.err', val.String()) self._arg('datadir', 'str', 'The path in which to store completed trajectories.', None, val.String()) self._arg('trajext', 'str', 'Extension of trajectory files. This is needed to copy them to datadir.', 'xtc', val.String()) self._arg('envvars', 'str', 'Envvars to propagate from submission node to the running node (comma-separated)', self._defaults['envvars'], val.String()) self._arg('prerun', 'list', 'Shell commands to execute on the running node before the job (e.g. ' 'loading modules)', self._defaults['prerun'], val.String(), nargs='*') # Load LSF configuration profile lsfconfig = _config['lsf'] profile = None if _configapp is not None: if lsfconfig is not None: if os.path.isfile(lsfconfig) and lsfconfig.endswith(('.yml', '.yaml')): try: with open(lsfconfig, 'r') as f: profile = yaml.load(f) logger.info('Loaded LSF configuration YAML file {}'.format(lsfconfig)) except: logger.warning('Could not load YAML file {}'.format(lsfconfig)) else: logger.warning('{} does not exist or it is not a YAML file.'.format(lsfconfig)) if profile: try: properties = profile[_configapp] except: raise RuntimeError('There is no profile in {} for configuration ' 'app {}'.format(lsfconfig, _configapp)) for p in properties: self.__dict__[p] = properties[p] logger.info('Setting {} to {}'.format(p, properties[p])) else: raise RuntimeError('No LSF configuration YAML file defined for the configapp') else: if lsfconfig is not None: logger.warning('LSF configuration YAML file defined without configuration app') # Find executables self._qsubmit = LsfQueue._find_binary('bsub') self._qinfo = LsfQueue._find_binary('bqueues') self._qcancel = LsfQueue._find_binary('bkill') self._qstatus = LsfQueue._find_binary('bjobs')
def __init__(self, _configapp=None, _configfile=None, _findExecutables=True, _logger=True): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg( "version", "int", "LSF major version", self._defaults["version"], valid_values=[9, 10], ) self._arg("jobname", "str", "Job name (identifier)", None, val.String()) self._arg( "queue", "list", "The queue or list of queues to run on. If list, it attempts to submit the job to " "the first queue listed", self._defaults["queue"], val.String(), nargs="*", ) self._arg("app", "str", "The application profile", self._defaults["app"], val.String()) self._arg( "ngpu", "int", "Number of GPUs to use for a single job", self._defaults["ngpu"], val.Number(int, "0POS"), ) self._arg( "gpu_options", "dict", "Number of GPUs to use for a single job", self._defaults["gpu_options"], val.Dictionary( key_type=str, valid_keys=["mode", "mps", "j_exclusive"], value_type={ "mode": str, "mps": str, "j_exclusive": str }, valid_values={ "mode": ["shared", "exclusive_process"], "mps": ["yes", "no"], "j_exclusive": ["yes", "no"], }, ), ) self._arg( "ncpu", "int", "Number of CPUs to use for a single job", self._defaults["ncpu"], val.Number(int, "0POS"), ) self._arg( "memory", "int", "Amount of memory per job (KB)", self._defaults["memory"], val.Number(int, "0POS"), ) self._arg( "walltime", "int", "Job timeout (hour:min or min)", self._defaults["walltime"], val.Number(int, "0POS"), ) self._arg( "resources", "list", "Resources of the queue", self._defaults["resources"], val.String(), nargs="*", ) self._cmdDeprecated("environment", "prerun") self._arg("outputstream", "str", "Output stream.", "lsf.%J.out", val.String()) self._arg("errorstream", "str", "Error stream.", "lsf.%J.err", val.String()) self._arg( "datadir", "str", "The path in which to store completed trajectories.", None, val.String(), ) self._arg( "trajext", "str", "Extension of trajectory files. This is needed to copy them to datadir.", "xtc", val.String(), ) self._arg( "envvars", "str", "Envvars to propagate from submission node to the running node (comma-separated)", self._defaults["envvars"], val.String(), ) self._arg( "prerun", "list", "Shell commands to execute on the running node before the job (e.g. " "loading modules)", self._defaults["prerun"], val.String(), nargs="*", ) # Load LSF configuration profile loadConfig(self, "lsf", _configfile, _configapp, _logger) # Find executables if _findExecutables: self._qsubmit = LsfQueue._find_binary("bsub") self._qinfo = LsfQueue._find_binary("bqueues") self._qcancel = LsfQueue._find_binary("bkill") self._qstatus = LsfQueue._find_binary("bjobs")
def __init__(self, _configapp=None, _configfile=None, _findExecutables=True, _logger=True): SimQueue.__init__(self) ProtocolInterface.__init__(self) self._arg("jobname", "str", "Job name (identifier)", None, val.String()) self._arg( "queue", "list", "The queue or list of queues to run on. If list, it attempts to submit the job to " "the first queue listed", self._defaults["queue"], val.String(), nargs="*", ) self._arg( "ngpu", "int", "Number of GPUs to use for a single job", self._defaults["ngpu"], val.Number(int, "0POS"), ) self._arg( "ncpu", "int", "Number of CPUs to use for a single job", self._defaults["ncpu"], val.Number(int, "0POS"), ) self._arg( "memory", "int", "Amount of memory per job (MiB)", self._defaults["memory"], val.Number(int, "0POS"), ) self._arg( "walltime", "int", "Job timeout (hour:min or min)", self._defaults["walltime"], val.Number(int, "0POS"), ) self._arg( "pe", "str", "SGE Parallel Environment", self._defaults["pe"], val.String(), ) self._arg( "resources", "list", "Resources of the queue", self._defaults["resources"], val.String(), nargs="*", ) self._cmdDeprecated("environment", "prerun") self._arg( "outputstream", "str", "Output stream.", "$REQUEST.oJID[.TASKID]", val.String(), ) self._arg( "errorstream", "str", "Error stream.", "$REQUEST.eJID[.TASKID]", val.String(), ) self._arg( "datadir", "str", "The path in which to store completed trajectories.", None, val.String(), ) self._arg( "trajext", "str", "Extension of trajectory files. This is needed to copy them to datadir.", "xtc", val.String(), ) self._arg( "envvars", "str", "Envvars to propagate from submission node to the running node (comma-separated)", self._defaults["envvars"], val.String(), ) self._arg( "prerun", "list", "Shell commands to execute on the running node before the job (e.g. " "loading modules)", self._defaults["prerun"], val.String(), nargs="*", ) # Load SGE configuration profile loadConfig(self, "sge", _configfile, _configapp, _logger) # Find executables if _findExecutables: self._qsubmit = SgeQueue._find_binary("qsub") self._qinfo = SgeQueue._find_binary("qhost") self._qcancel = SgeQueue._find_binary("qdel") self._qstatus = SgeQueue._find_binary("qstat") self._checkQueue()