def launch_jobs(self, task_list): # Check cluster cur_job_count = cluster.count_jobs() if cur_job_count == -1: print('ERROR:cannot get count of jobs from cluster') return print(str(cur_job_count)+' jobs currently in queue') # Launch until we reach cluster limit or no jobs left to launch while cur_job_count < self.queue_limit and len(task_list)>0: cur_task = task_list.pop() # Confirm task is still ready to run if cur_task.get_status() != task.NEED_TO_RUN: continue print(' +Launching job:'+cur_task.assessor_label+', currently '+str(cur_job_count)+' jobs in cluster queue') success = cur_task.launch(self.root_job_dir,self.job_email,self.job_email_options) if(success != True): print('ERROR:failed to launch job') cur_job_count = cluster.count_jobs() if cur_job_count == -1: print('ERROR:cannot get count of jobs from cluster') return
def launch_tasks(self, task_list, writeonly=False, pbsdir=None): """ Launch tasks from the passed list until the queue is full or the list is empty :param task_list: list of task to launch :param writeonly: write the job files without submitting them :param pbsdir: folder to store the pbs file :return: None """ # Check number of jobs on cluster cur_job_count = cluster.count_jobs() if cur_job_count == -1: LOGGER.error('cannot get count of jobs from cluster') return LOGGER.info(str(cur_job_count)+' jobs currently in queue') # Launch until we reach cluster limit or no jobs left to launch while (cur_job_count < self.queue_limit or writeonly) and len(task_list) > 0: cur_task = task_list.pop() # Confirm task is still ready to run # I don't think that we need to make this get here. We've already # filtered the assessors as need to run. # if cur_task.get_status() != task.NEED_TO_RUN: # continue if writeonly: mes_format = """ +Writing PBS file for job:{label}, currently {count} jobs in cluster queue""" LOGGER.info(mes_format.format(label=cur_task.assessor_label, count=str(cur_job_count))) else: mes_format = """ +Launching job:{label}, currently {count} jobs in cluster queue""" LOGGER.info(mes_format.format(label=cur_task.assessor_label, count=str(cur_job_count))) success = cur_task.launch(self.root_job_dir, self.job_email, self.job_email_options, self.xnat_host, writeonly, pbsdir) if not success: LOGGER.error('ERROR:failed to launch job') raise cluster.ClusterLaunchException cur_job_count = cluster.count_jobs() if cur_job_count == -1: LOGGER.error('ERROR:cannot get count of jobs from cluster') raise cluster.ClusterCountJobsException
def launch_tasks(self, task_list, writeonly=False, pbsdir=None): """ Launch tasks from the passed list until the queue is full or the list is empty :param task_list: list of task to launch :param writeonly: write the job files without submitting them :param pbsdir: folder to store the pbs file :return: None """ # Check number of jobs on cluster cur_job_count = cluster.count_jobs() if cur_job_count == -1: LOGGER.error('cannot get count of jobs from cluster') return LOGGER.info(str(cur_job_count) + ' jobs currently in queue') # Launch until we reach cluster limit or no jobs left to launch while (cur_job_count < self.queue_limit or writeonly) and len(task_list) > 0: cur_task = task_list.pop() # Confirm task is still ready to run # I don't think that we need to make this get here. We've already # filtered the assessors as need to run. # if cur_task.get_status() != task.NEED_TO_RUN: # continue if writeonly: mes_format = """ +Writing PBS file for job:{label}, currently {count} jobs in cluster queue""" LOGGER.info( mes_format.format(label=cur_task.assessor_label, count=str(cur_job_count))) else: mes_format = """ +Launching job:{label}, currently {count} jobs in cluster queue""" LOGGER.info( mes_format.format(label=cur_task.assessor_label, count=str(cur_job_count))) try: if self.launcher_type in ['diskq-cluster', 'diskq-combined']: success = cur_task.launch() else: success = cur_task.launch(self.root_job_dir, self.job_email, self.job_email_options, self.xnat_host, writeonly, pbsdir) except Exception as E: LOGGER.critical('Caught exception launching job %s' % cur_task.assessor_label) LOGGER.critical('Exception class %s caught with message %s' % (E.__class__, E.message)) success = False if not success: LOGGER.error('ERROR:failed to launch job') raise cluster.ClusterLaunchException cur_job_count = cluster.count_jobs() if cur_job_count == -1: LOGGER.error('ERROR:cannot get count of jobs from cluster') raise cluster.ClusterCountJobsException