job.setCommandline('R --no-readline --no-restore --no-save -f '+path_to_inputfile) batch_job.addJob(job) batch_job.addInputFile('/home/markus/Desktop/R/'+inputfilename) batch_job.setDefaultNoCpus(1) batch_job.setDefaultWalltimeInSeconds(walltime) print 'preparing jobs on backend...' batch_job.prepareAndCreateJobs(redistribute) if redistribute: print 'job distribution:' print batch_job.getOptimizationResult() print 'submitting jobs to grid...' batch_job.submit(True) print 'submission finished...'
# now we add an input file that is common to all jobs batchJob.addInputFile(inputFile1Url) batchJob.addInputFile(inputFile2Url) batchJob.addInputFile(inputFile3Url) # we don't want to submit to tpac because it doesn't work #multiPartJob.setSitesToExclude(["uq", "hpsc", "auckland", "canterbury"]); try: print "Creating jobs on the backend and staging files..." batchJob.prepareAndCreateJobs(True) except (JobsException), error: for job in error.getFailures().keySet(): print "Job: " + job.getJobname() + ", Error: " + error.getFailures( ).get(job).getLocalizedMessage() sys.exit() # this is not really needed print "Job distribution:" for subLoc in batchJob.getOptimizationResult().keySet(): print subLoc + " : " + batchJob.getOptimizationResult().get(subLoc) print "Submitting jobs..." batchJob.submit(True) print 'Submission finished.' print 'Name of submitted batchjob: ' + batchJobName # don't forget to exit properly. this cleans up possible existing threads/executors sys.exit()
# this can be finetuned by exluding or including sites. another option would be to specifying the submission location # for every single job and setting "False" below (this would make job submission faster since jobs don't need to be re-distributed/moved on the backend). batch_job.prepareAndCreateJobs(True) except (JobsException), error: for job in error.getFailures().keySet(): print "Job: " + job.getJobname() + ", Error: " + error.getFailures( ).get(job).getLocalizedMessage() sys.exit() print "Job distribution:" for subLoc in batch_job.getOptimizationResult().keySet(): print subLoc + " : " + batch_job.getOptimizationResult().get(subLoc) print "Submitting jobs..." batch_job.submit() restarted = False # now we wait for all jobs to finish. Actually, we probably should test whether the job was successful as well... while not batch_job.isFinished(True): # printing some stats print batch_job.getProgress() # restart failed jobs everytime failedpolicy = DefaultResubmitPolicy() # to only resubmit failed jobs, we have to remove the waiting jobs resubmission that is set by default batch_job.restart(failedpolicy, True) # restart once after the jobsubmission is finished to optimize job distributions to queues where the job actually runs if not restarted:
except (JobsException), error: print ("HALT: Exception submitting jobs from BatchJobObject " + batch_jobs.getJobname() + "!") for job in error.getFailures().keySet(): print "Job: " + job.getJobname() + ", Error: " + error.getFailures().get(job).getLocalizedMessage() sys.exit(1) except (BackendException), error: print ("HALT: Exception from grisu backend " + backend + "!") print (error.getLocalizedMessage()) print ("========================") time.sleep(3) error.printStackTrace() sys.exit(1) time.sleep(3) print "INFO: Submitting jobs in batch " + batch_jobs.getJobname() batch_jobs.submit() restarted = False print "INFO: Waiting for batch " + batch_jobs.getJobname() + " to finish" while not batch_jobs.isFinished(True): print "\rWAITING: Running " + str(job_count) + " jobs:", print " Waiting [" + str(batch_jobs.getNumberOfWaitingJobs()) + "]", print " Active [" + str(batch_jobs.getNumberOfRunningJobs()) + "]", print " Successful [" + str(batch_jobs.getNumberOfSuccessfulJobs()) + "]", print " Failed [" + str(batch_jobs.getNumberOfFailedJobs()) + "]", time.sleep(3) # Refresh status one last time print "\rWAITING: Running " + str(job_count) + " jobs:", print " Waiting [" + str(batch_jobs.getNumberOfWaitingJobs()) + "]",
# by specifying "True" we tell the backend to automatically distribute the jobs to all available submission locations # this can be finetuned by exluding or including sites. another option would be to specifying the submission location # for every single job and setting "False" below (this would make job submission faster since jobs don't need to be re-distributed/moved on the backend). batch_job.prepareAndCreateJobs(True) except (JobsException), error: for job in error.getFailures().keySet(): print "Job: "+job.getJobname()+", Error: "+error.getFailures().get(job).getLocalizedMessage() sys.exit() print "Job distribution:" print batch_job.getOptimizationResult() print "Submitting jobs..." batch_job.submit() # now we wait for all jobs to be finished, checking for updates every 10 seconds. in real life we would set a much higher check intervall since we don't want to overload # the backend and also it's not really necessary batch_job.waitForJobToFinish(10) print "BatchJob "+batch_job.getJobname()+" finished." # finally, everything is ready. We could do a lot more here, but you get the idea... for job in batch_job.getJobs(): print "Job: "+job.getJobname()+", Status: "+job.getStatusString(False) print "Submitted to: "+job.getJobProperty(Constants.SUBMISSION_SITE_KEY) print print "Stdout: " print job.getStdOutContent()
print 'jobname on backend: ' + batch_job_name path_to_inputfile = batch_job.pathToInputFiles() + inputfilename for i in range(1, gen_jobs + 1): job = JobObject(si) job.setEmail_address(email) job.setEmail_on_job_finish(True) job.setCommandline('R --no-readline --no-restore --no-save -f ' + path_to_inputfile) batch_job.addJob(job) batch_job.addInputFile('/home/markus/Desktop/R/' + inputfilename) batch_job.setDefaultNoCpus(1) batch_job.setDefaultWalltimeInSeconds(walltime) print 'preparing jobs on backend...' batch_job.prepareAndCreateJobs(redistribute) if redistribute: print 'job distribution:' print batch_job.getOptimizationResult() print 'submitting jobs to grid...' batch_job.submit(True) print 'submission finished...'
# now we add an input file that is common to all jobs batchJob.addInputFile(inputFile1Url); batchJob.addInputFile(inputFile2Url); batchJob.addInputFile(inputFile3Url); # we don't want to submit to tpac because it doesn't work #multiPartJob.setSitesToExclude(["uq", "hpsc", "auckland", "canterbury"]); try: print "Creating jobs on the backend and staging files..." batchJob.prepareAndCreateJobs(True) except (JobsException), error: for job in error.getFailures().keySet(): print "Job: "+job.getJobname()+", Error: "+error.getFailures().get(job).getLocalizedMessage() sys.exit() # this is not really needed print "Job distribution:" for subLoc in batchJob.getOptimizationResult().keySet(): print subLoc + " : " +batchJob.getOptimizationResult().get(subLoc) print "Submitting jobs..." batchJob.submit(True) print 'Submission finished.' print 'Name of submitted batchjob: '+batchJobName # don't forget to exit properly. this cleans up possible existing threads/executors sys.exit()
batch_jobs.getJobname() + "!") for job in error.getFailures().keySet(): print "Job: " + job.getJobname() + ", Error: " + error.getFailures( ).get(job).getLocalizedMessage() sys.exit(1) except (BackendException), error: print("HALT: Exception from grisu backend " + backend + "!") print(error.getLocalizedMessage()) print("========================") time.sleep(3) error.printStackTrace() sys.exit(1) time.sleep(3) print "INFO: Submitting jobs in batch " + batch_jobs.getJobname() batch_jobs.submit() restarted = False print "INFO: Waiting for batch " + batch_jobs.getJobname() + " to finish" while not batch_jobs.isFinished(True): print "\rWAITING: Running " + str(job_count) + " jobs:", print " Waiting [" + str(batch_jobs.getNumberOfWaitingJobs()) + "]", print " Active [" + str(batch_jobs.getNumberOfRunningJobs()) + "]", print " Successful [" + str(batch_jobs.getNumberOfSuccessfulJobs()) + "]", print " Failed [" + str(batch_jobs.getNumberOfFailedJobs()) + "]", time.sleep(3) # Refresh status one last time print "\rWAITING: Running " + str(job_count) + " jobs:", print " Waiting [" + str(batch_jobs.getNumberOfWaitingJobs()) + "]",