def test_serialize_and_deserialize(self): filename = "temp.bin" obj = [1, 2, 3] Serialization.serialize_object_overwrite(obj, filename) obj2 = Serialization.deserialize_object(filename) self.assertEqual(obj, obj2)
def test_serialize_object_file_exists(self): filename = "temp.bin" obj = [1, 2, 3] with self.assertRaises(OSError): Serialization.serialize_object(obj, filename) Serialization.serialize_object(obj, filename) self.assertTrue(os.path.exists(filename)) os.remove(filename)
def test_serialize_object_file_not_exists(self): filename = "temp.bin" obj = [1, 2, 3] try: os.remove(filename) except OSError: pass Serialization.serialize_object(obj, filename) self.assertTrue(os.path.exists(filename))
def submit_wrapped_pbs_job(self, wrapped_job, job_name): job_folder = self.get_job_foldername(job_name) # try to create folder if not yet exists job_filename = self.get_job_filename(job_name) logger.info("Creating job with file %s" % job_filename) try: makedirs(job_folder) except OSError: pass Serialization.serialize_object(wrapped_job, job_filename) # allow the queue to process things time.sleep(self.submission_delay) dispatcher_string = self._get_dispatcher_string(job_filename) # get computing ressource constraints from job walltime, memory, nodes = wrapped_job.get_walltime_mem_nodes() job_string = self.create_batch_script(job_name, dispatcher_string, walltime, memory, nodes) # put the custom parameter string in front if existing # but not as first line to avoid problems with #/bin/bash things if self.batch_parameters.parameter_prefix != "": lines = job_string.split(os.linesep) job_string = os.linesep.join( [lines[0], self.batch_parameters.parameter_prefix] + lines[1:]) f = open( job_folder + os.sep + BatchClusterComputationEngine.batch_script_filename, "w") f.write(job_string) f.close() job_id = self.submit_to_batch_system(job_string) if job_id == "": raise RuntimeError( "Could not parse job_id. Something went wrong with the job submission" ) f = open( job_folder + os.sep + BatchClusterComputationEngine.job_id_filename, 'w') f.write(job_id + os.linesep) f.close() if not isinstance(wrapped_job, FireAndForgetJob): # track submitted (and unfinished) jobs and their start time self._insert_job_time_sorted(job_name, job_id)
def dispatch(filename): # wait until FS says that the file exists while not FileSystem.file_exists_new_shell(filename): time.sleep(1) job = Serialization.deserialize_object(filename) job.compute()
def submit_wrapped_pbs_job(self, wrapped_job, job_name): job_folder = self.get_job_foldername(job_name) # try to create folder if not yet exists job_filename = self.get_job_filename(job_name) logger.info("Creating job with file %s" % job_filename) try: makedirs(job_folder) except OSError: pass Serialization.serialize_object(wrapped_job, job_filename) # allow the queue to process things time.sleep(self.submission_delay) dispatcher_string = self._get_dispatcher_string(job_filename) # get computing ressource constraints from job walltime, memory, nodes = wrapped_job.get_walltime_mem_nodes() job_string = self.create_batch_script(job_name, dispatcher_string, walltime, memory, nodes) # put the custom parameter string in front if existing # but not as first line to avoid problems with #/bin/bash things if self.batch_parameters.parameter_prefix != "": lines = job_string.split(os.linesep) job_string = os.linesep.join([lines[0], self.batch_parameters.parameter_prefix] + lines[1:]) f = open(job_folder + os.sep + BatchClusterComputationEngine.batch_script_filename, "w") f.write(job_string) f.close() job_id = self.submit_to_batch_system(job_string) if job_id == "": raise RuntimeError("Could not parse job_id. Something went wrong with the job submission") f = open(job_folder + os.sep + BatchClusterComputationEngine.job_id_filename, 'w') f.write(job_id + os.linesep) f.close() if not isinstance(wrapped_job, FireAndForgetJob): # track submitted (and unfinished) jobs and their start time self._insert_job_time_sorted(job_name, job_id)
def _get_max_wait_time_exceed_jobs(self): names = [] current_time = time.time() for job_name, job_time, _ in self.submitted_jobs: # load job ressources job_filename = self.get_job_filename(job_name) job = Serialization.deserialize_object(job_filename) if abs(current_time - job_time) > job.walltime: names += [job_name] return names
def _wait_until_n_unfinished(self, desired_num_unfinished): """ Iteratively checks all non-finished jobs and updates whether they are finished. Blocks until there are less or exactly desired_num_unfinished unfinished jobs in the queue. Messages a "waiting for" info message for the oldest job in the queue. """ # save all job list to file for reconstructing results later self.save_all_job_list() last_printed = self._get_oldest_job_in_queue() logger.info("Waiting for %s and %d other jobs" % (last_printed, self._get_num_unfinished_jobs() - 1)) while self._get_num_unfinished_jobs() > desired_num_unfinished: oldest = self._get_oldest_job_in_queue() if oldest != last_printed: last_printed = oldest logger.info( "Waiting for %s and %d other jobs" % (last_printed, self._get_num_unfinished_jobs() - 1)) # delete all finished jobs from internal list i = 0 while i < len(self.submitted_jobs): job_name = self.submitted_jobs[i][0] if self._check_job_done(job_name): del self.submitted_jobs[i] # dont change i as it is now the index of the next element else: i += 1 # check for re-submissions if self.batch_parameters.resubmit_on_timeout: for job_name in self._get_max_wait_time_exceed_jobs(): # load job ressources job_filename = self.get_job_filename(job_name) job = Serialization.deserialize_object(job_filename) logger.info("%s exceeded maximum waiting time of %dh" % (job_name, job.walltime)) self._resubmit(job_name) time.sleep(self.check_interval)
def _wait_until_n_unfinished(self, desired_num_unfinished): """ Iteratively checks all non-finished jobs and updates whether they are finished. Blocks until there are less or exactly desired_num_unfinished unfinished jobs in the queue. Messages a "waiting for" info message for the oldest job in the queue. """ # save all job list to file for reconstructing results later self.save_all_job_list() last_printed = self._get_oldest_job_in_queue() logger.info("Waiting for %s and %d other jobs" % (last_printed, self._get_num_unfinished_jobs() - 1)) while self._get_num_unfinished_jobs() > desired_num_unfinished: oldest = self._get_oldest_job_in_queue() if oldest != last_printed: last_printed = oldest logger.info("Waiting for %s and %d other jobs" % (last_printed, self._get_num_unfinished_jobs() - 1)) # delete all finished jobs from internal list i = 0 while i < len(self.submitted_jobs): job_name = self.submitted_jobs[i][0] if self._check_job_done(job_name): del self.submitted_jobs[i] # dont change i as it is now the index of the next element else: i += 1 # check for re-submissions if self.batch_parameters.resubmit_on_timeout: for job_name in self._get_max_wait_time_exceed_jobs(): # load job ressources job_filename = self.get_job_filename(job_name) job = Serialization.deserialize_object(job_filename) logger.info("%s exceeded maximum waiting time of %dh" % (job_name, job.walltime)) self._resubmit(job_name) time.sleep(self.check_interval)
def _resubmit(self, job_name): new_job_name = self.create_job_name() logger.info("Re-submitting under name %s" % new_job_name) # remove from unfinished jobs list for i in range(len(self.submitted_jobs)): if self.submitted_jobs[i][0] == job_name: del self.submitted_jobs[i] break # remove from all jobs list for i in range(len(self.all_jobs)): if self.all_jobs[i] == job_name: del self.all_jobs[i] break # load job from disc and re-submit under new name job_filename = self.get_job_filename(job_name) wrapped_job = Serialization.deserialize_object(job_filename) self.submit_wrapped_pbs_job(wrapped_job, new_job_name)
def _resubmit(self, job_name): new_job_name = self.create_job_name() logger.info("Re-submitting under name %s" % new_job_name) # remove from unfinished jobs list for i in range(len(self.submitted_jobs)): if self.submitted_jobs[i][0] == job_name: del self.submitted_jobs[i] break # remove from all jobs list for i in range(len(self.all)): if self.all_jobs[i] == job_name: del self.all_jobs[i] break # load job from disc and re-submit under new name job_filename = self.get_job_filename(job_name) wrapped_job = Serialization.deserialize_object(job_filename) self.submit_wrapped_pbs_job(wrapped_job, new_job_name)