batch_job_name = batch_job.getJobname() print 'jobname on backend: '+batch_job_name path_to_inputfile = batch_job.pathToInputFiles()+inputfilename for i in range(1,gen_jobs+1): job = JobObject(si) job.setEmail_address(email) job.setEmail_on_job_finish(True) job.setCommandline('R --no-readline --no-restore --no-save -f '+path_to_inputfile) batch_job.addJob(job) batch_job.addInputFile('/home/markus/Desktop/R/'+inputfilename) batch_job.setDefaultNoCpus(1) batch_job.setDefaultWalltimeInSeconds(walltime) print 'preparing jobs on backend...' batch_job.prepareAndCreateJobs(redistribute) if redistribute: print 'job distribution:' print batch_job.getOptimizationResult() print 'submitting jobs to grid...' batch_job.submit(True)
# this is just to demonstrate how to restart a failed job later on job.setCommandline('cat ' + inputFile3relPath) else: job.setCommandline('cat ' + inputFile1relPath + ' ' + inputFile2relPath) job.setWalltimeInSeconds(60) # adding the job to the multijob batchJob.addJob(job) # this should be set because it's used for the matchmaking/metascheduling batchJob.setDefaultNoCpus(1) batchJob.setDefaultWalltimeInSeconds(60) # now we add an input file that is common to all jobs batchJob.addInputFile(inputFile1Url) batchJob.addInputFile(inputFile2Url) batchJob.addInputFile(inputFile3Url) # we don't want to submit to tpac because it doesn't work #multiPartJob.setSitesToExclude(["uq", "hpsc", "auckland", "canterbury"]); try: print "Creating jobs on the backend and staging files..." batchJob.prepareAndCreateJobs(True) except (JobsException), error: for job in error.getFailures().keySet(): print "Job: " + job.getJobname() + ", Error: " + error.getFailures( ).get(job).getLocalizedMessage() sys.exit()
for i in range(0, numberOfJobs): # create the single job job = JobObject(si) # better to set the application to use explicitely because in that case we don't need to use mds (faster) job.setCommandline('cat ' + pathToInputFiles + 'commonFile.txt ' + 'singleJobFile.txt') # adding a job-specific input file job.addInputFileUrl("/home/markus/tmp/singleJobFile.txt") # adding the job to the multijob batch_job.addJob(job) # now we are adding a file that can be used by all of the child jobs. it needs to be referenced via the pathToInputFiles() method shown above batch_job.addInputFile('/home/markus/tmp/commonJobFile.txt') batch_job.setDefaultNoCpus(1) batch_job.setDefaultWalltimeInSeconds(60) try: print "Creating jobs on the backend and staging files..." # by specifying "True" we tell the backend to automatically distribute the jobs to all available submission locations # this can be finetuned by exluding or including sites. another option would be to specifying the submission location # for every single job and setting "False" below (this would make job submission faster since jobs don't need to be re-distributed/moved on the backend). batch_job.prepareAndCreateJobs(True) except (JobsException), error: for job in error.getFailures().keySet(): print "Job: " + job.getJobname() + ", Error: " + error.getFailures( ).get(job).getLocalizedMessage() sys.exit()
print "INFO: Creating a Batch Job Object called " + batch_job_name batch_jobs = BatchJobObject(service_interface, batch_job_name, group, application, version) batch_jobs.setConcurrentInputFileUploadThreads(5) # Set the number of concurrent uploads batch_jobs.setConcurrentJobCreationThreads(5) # Set the number of concurrent jobs batch_jobs.setDefaultNoCpus(1) # Set the number of CPUs required batch_jobs.setDefaultWalltimeInSeconds(300) # Set the maximum walltime to 5 minutes exclude_sites = list() exclude_sites.append("AUT") batch_jobs.setLocationsToExclude(exclude_sites) # Create a blacklist of sites to exclude # Currently the AUT location is not behaving, so always exclude it print "INFO: Adding common files to Batch Job Object " + batch_job_name batch_jobs.addInputFile(os.path.join(current_dir, dictionary_path)) batch_jobs.addInputFile(os.path.join(current_dir, "countbacon.py")) print "INFO: Defining jobs from input directory" job_count = 0 for file_name in os.listdir(input_path): print "INFO: Defining job for " + file_name job_name = base_job_name + "-" + file_name job = JobObject(service_interface) job.setJobname(job_name) job.setApplication("python") # Set the application being run job.setApplicationVersion("2.4") # Set the application version, note this is an exact match job.addInputFileUrl(os.path.join(current_dir, input_path, file_name)) job.setCommandline("python ../countbacon.py ../" + dictionary_path + " " + file_name) print "INFO: " + job.getJobname() + " defined" batch_jobs.addJob(job)
# now we can calculate the relative path (from every job directory) to the common input file folder pathToInputFiles = batch_job.pathToInputFiles() for i in range(0, numberOfJobs): # create the single job job = JobObject(si) # better to set the application to use explicitely because in that case we don't need to use mds (faster) job.setCommandline('cat ' + pathToInputFiles+'commonJobFile.txt ' + 'singleJobFile.txt') # adding a job-specific input file job.addInputFileUrl("/home/markus/tmp/singleJobFile.txt") # adding the job to the multijob batch_job.addJob(job) # now we are adding a file that can be used by all of the child jobs. it needs to be referenced via the pathToInputFiles() method shown above batch_job.addInputFile('/home/markus/tmp/commonJobFile.txt') batch_job.setDefaultNoCpus(1); batch_job.setDefaultWalltimeInSeconds(60); batch_job.setLocationsToExclude(["gt5test:ng1.canterbury.ac.nz"]) try: print "Creating jobs on the backend and staging files..." # by specifying "True" we tell the backend to automatically distribute the jobs to all available submission locations # this can be finetuned by exluding or including sites. another option would be to specifying the submission location # for every single job and setting "False" below (this would make job submission faster since jobs don't need to be re-distributed/moved on the backend). batch_job.prepareAndCreateJobs(True) except (JobsException), error: for job in error.getFailures().keySet(): print "Job: "+job.getJobname()+", Error: "+error.getFailures().get(job).getLocalizedMessage()
batch_job_name = batch_job.getJobname() print 'jobname on backend: ' + batch_job_name path_to_inputfile = batch_job.pathToInputFiles() + inputfilename for i in range(1, gen_jobs + 1): job = JobObject(si) job.setEmail_address(email) job.setEmail_on_job_finish(True) job.setCommandline('R --no-readline --no-restore --no-save -f ' + path_to_inputfile) batch_job.addJob(job) batch_job.addInputFile('/home/markus/Desktop/R/' + inputfilename) batch_job.setDefaultNoCpus(1) batch_job.setDefaultWalltimeInSeconds(walltime) print 'preparing jobs on backend...' batch_job.prepareAndCreateJobs(redistribute) if redistribute: print 'job distribution:' print batch_job.getOptimizationResult() print 'submitting jobs to grid...' batch_job.submit(True) print 'submission finished...'
if i == 3 or i == 13: # this is just to demonstrate how to restart a failed job later on job.setCommandline('cat '+inputFile3relPath) else: job.setCommandline('cat '+ inputFile1relPath + ' ' + inputFile2relPath) job.setWalltimeInSeconds(60) # adding the job to the multijob batchJob.addJob(job) # this should be set because it's used for the matchmaking/metascheduling batchJob.setDefaultNoCpus(1); batchJob.setDefaultWalltimeInSeconds(60); # now we add an input file that is common to all jobs batchJob.addInputFile(inputFile1Url); batchJob.addInputFile(inputFile2Url); batchJob.addInputFile(inputFile3Url); # we don't want to submit to tpac because it doesn't work #multiPartJob.setSitesToExclude(["uq", "hpsc", "auckland", "canterbury"]); try: print "Creating jobs on the backend and staging files..." batchJob.prepareAndCreateJobs(True) except (JobsException), error: for job in error.getFailures().keySet(): print "Job: "+job.getJobname()+", Error: "+error.getFailures().get(job).getLocalizedMessage() sys.exit() # this is not really needed
batch_jobs = BatchJobObject(service_interface, batch_job_name, group, application, version) batch_jobs.setConcurrentInputFileUploadThreads( 5) # Set the number of concurrent uploads batch_jobs.setConcurrentJobCreationThreads( 5) # Set the number of concurrent jobs batch_jobs.setDefaultNoCpus(1) # Set the number of CPUs required batch_jobs.setDefaultWalltimeInSeconds(300) # Set the maximum walltime to 5 minutes batch_jobs.setLocationsToExclude(["AUT" ]) # Create a blacklist of sites to exclude # Currently the AUT location is not behaving, so always exclude it print "INFO: Adding common files to Batch Job Object " + batch_job_name batch_jobs.addInputFile(os.path.join(current_dir, dictionary_path)) batch_jobs.addInputFile(os.path.join(current_dir, "countbacon.py")) print "INFO: Defining jobs from input directory" job_count = 0 for file_name in os.listdir(input_path): print "INFO: Defining job for " + file_name job_name = base_job_name + "-" + file_name job = JobObject(service_interface) job.setJobname(job_name) job.setApplication("python") # Set the application being run job.setApplicationVersion( "2.4") # Set the application version, note this is an exact match job.addInputFileUrl(os.path.join(current_dir, input_path, file_name)) job.setCommandline("python ../countbacon.py ../" + dictionary_path + " " + file_name)