class RunManager(object): def __init__(self, event_list, event, config, filemanager): self.config = config self.account = config['global'].get('account', '') self.event_list = event_list self.filemanager = filemanager self.dryrun = True if config['global']['dryrun'] == True else False self.debug = True if config['global']['debug'] == True else False self._resource_path = config['global']['resource_path'] """ A list of cases, dictionaries structured as: case (str): the full case name jobs (list): a list of job.Jobs short_name (str): the short name of the case """ self.cases = list() self.running_jobs = list() self.kill_event = event self._job_total = 0 self._job_complete = 0 self.slurm = Slurm() max_jobs = config['global']['max_jobs'] self.max_running_jobs = max_jobs if max_jobs else self.slurm.get_node_number( ) * 6 while self.max_running_jobs == 0: sleep(1) msg = 'Unable to communication with scontrol, checking again' print_line(msg, event_list) logging.error(msg) self.max_running_jobs = self.slurm.get_node_number() * 6 def check_max_running_jobs(self): """ Checks if the maximum number of jobs are running Returns True if the max or more are running, false otherwise """ try: job_info = self.slurm.queue() except: return True else: running_jobs = 0 for job in job_info: if job['STATE'] in ['R', 'PD']: running_jobs += 1 if running_jobs >= self.max_running_jobs: return True return False def add_pp_type_to_cases(self, freqs, job_type, start, end, case, run_type=None):
def test_batch(self): print '\n' print_message( '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') slurm = Slurm() command = os.path.join('tests', 'test_slurm_batch.sh') job_id = slurm.batch(command, '-n 1 -N 1') self.assertTrue(job_id) self.assertTrue(isinstance(job_id, int)) info = slurm.showjob(job_id) allowed_states = ['PENDING', 'RUNNING', 'COMPLETE', 'COMPLETING'] self.assertTrue(info['JobState'] in allowed_states) info = slurm.queue() in_queue = False for item in info: if int(item['JOBID']) == job_id: in_queue = True self.assertTrue(item['STATE'] in ['PD', 'R']) break self.assertTrue(in_queue) slurm.cancel(job_id)