def dispatch(filename): # wait until FS says that the file exists while not FileSystem.file_exists_new_shell(filename): time.sleep(1) job = Serialization.deserialize_object(filename) job.compute()
def test_serialize_and_deserialize(self): filename = "temp.bin" obj = [1, 2, 3] Serialization.serialize_object_overwrite(obj, filename) obj2 = Serialization.deserialize_object(filename) self.assertEqual(obj, obj2)
def _get_max_wait_time_exceed_jobs(self): names = [] current_time = time.time() for job_name, job_time, _ in self.submitted_jobs: # load job ressources job_filename = self.get_job_filename(job_name) job = Serialization.deserialize_object(job_filename) if abs(current_time - job_time) > job.walltime: names += [job_name] return names
def _wait_until_n_unfinished(self, desired_num_unfinished): """ Iteratively checks all non-finished jobs and updates whether they are finished. Blocks until there are less or exactly desired_num_unfinished unfinished jobs in the queue. Messages a "waiting for" info message for the oldest job in the queue. """ # save all job list to file for reconstructing results later self.save_all_job_list() last_printed = self._get_oldest_job_in_queue() logger.info("Waiting for %s and %d other jobs" % (last_printed, self._get_num_unfinished_jobs() - 1)) while self._get_num_unfinished_jobs() > desired_num_unfinished: oldest = self._get_oldest_job_in_queue() if oldest != last_printed: last_printed = oldest logger.info( "Waiting for %s and %d other jobs" % (last_printed, self._get_num_unfinished_jobs() - 1)) # delete all finished jobs from internal list i = 0 while i < len(self.submitted_jobs): job_name = self.submitted_jobs[i][0] if self._check_job_done(job_name): del self.submitted_jobs[i] # dont change i as it is now the index of the next element else: i += 1 # check for re-submissions if self.batch_parameters.resubmit_on_timeout: for job_name in self._get_max_wait_time_exceed_jobs(): # load job ressources job_filename = self.get_job_filename(job_name) job = Serialization.deserialize_object(job_filename) logger.info("%s exceeded maximum waiting time of %dh" % (job_name, job.walltime)) self._resubmit(job_name) time.sleep(self.check_interval)
def _wait_until_n_unfinished(self, desired_num_unfinished): """ Iteratively checks all non-finished jobs and updates whether they are finished. Blocks until there are less or exactly desired_num_unfinished unfinished jobs in the queue. Messages a "waiting for" info message for the oldest job in the queue. """ # save all job list to file for reconstructing results later self.save_all_job_list() last_printed = self._get_oldest_job_in_queue() logger.info("Waiting for %s and %d other jobs" % (last_printed, self._get_num_unfinished_jobs() - 1)) while self._get_num_unfinished_jobs() > desired_num_unfinished: oldest = self._get_oldest_job_in_queue() if oldest != last_printed: last_printed = oldest logger.info("Waiting for %s and %d other jobs" % (last_printed, self._get_num_unfinished_jobs() - 1)) # delete all finished jobs from internal list i = 0 while i < len(self.submitted_jobs): job_name = self.submitted_jobs[i][0] if self._check_job_done(job_name): del self.submitted_jobs[i] # dont change i as it is now the index of the next element else: i += 1 # check for re-submissions if self.batch_parameters.resubmit_on_timeout: for job_name in self._get_max_wait_time_exceed_jobs(): # load job ressources job_filename = self.get_job_filename(job_name) job = Serialization.deserialize_object(job_filename) logger.info("%s exceeded maximum waiting time of %dh" % (job_name, job.walltime)) self._resubmit(job_name) time.sleep(self.check_interval)
def _resubmit(self, job_name): new_job_name = self.create_job_name() logger.info("Re-submitting under name %s" % new_job_name) # remove from unfinished jobs list for i in range(len(self.submitted_jobs)): if self.submitted_jobs[i][0] == job_name: del self.submitted_jobs[i] break # remove from all jobs list for i in range(len(self.all_jobs)): if self.all_jobs[i] == job_name: del self.all_jobs[i] break # load job from disc and re-submit under new name job_filename = self.get_job_filename(job_name) wrapped_job = Serialization.deserialize_object(job_filename) self.submit_wrapped_pbs_job(wrapped_job, new_job_name)
def _resubmit(self, job_name): new_job_name = self.create_job_name() logger.info("Re-submitting under name %s" % new_job_name) # remove from unfinished jobs list for i in range(len(self.submitted_jobs)): if self.submitted_jobs[i][0] == job_name: del self.submitted_jobs[i] break # remove from all jobs list for i in range(len(self.all)): if self.all_jobs[i] == job_name: del self.all_jobs[i] break # load job from disc and re-submit under new name job_filename = self.get_job_filename(job_name) wrapped_job = Serialization.deserialize_object(job_filename) self.submit_wrapped_pbs_job(wrapped_job, new_job_name)