"++remote-shell", "ssh", "++nodelist", "/root/machinefile", "+p2", "/usr/local/NAMD_2.7b1_Linux-x86/namd2", "/root/run/NPT.conf", ] # jd.working_directory = "/root/run/" jd.output = "stdout.txt" jd.error = "stderr.txt" jobs = [] for i in range(0, NUMBER_JOBS): print "Start job no.: " + str(i) sj = bigjob_cloud.subjob(bigjob=bj) sj.submit_job(jd) jobs.append(sj) # busy wait for completion while 1: try: number_done = 0 for i in jobs: state = str(i.get_state()) print "job: " + str(i) + " state: " + str(state) if state == "Failed" or state == "Done": number_done = number_done + 1 if number_done == len(jobs): break
# !!Adjust!! jd.working_directory = "/home/luckow/run/" jd.output = "stdout_tg.txt" jd.error = "stderr_tg.txt" print "**************** Start SubJob Submission ************** " subjob_start = time.time() jobs_ec2 = [] jobs_nimbus = [] jobs_tg = [] jobs_condor = [] number_started_jobs= 0 total_number_jobs = NUMBER_JOBS_NIMBUS+NUMBER_JOBS_EC2+NUMBER_JOBS_GRID+NUMBER_JOBS_CONDOR for i in range(0, NUMBER_JOBS_NIMBUS): print "Start job: " +str(number_started_jobs + 1) + " on Nimbus" sj_nimbus = bigjob_cloud.subjob(bigjob=bj_nimbus) jd_nimbus.output = "stdout_nimbus.txt."+str(number_started_jobs+1) jd_nimbus.error = "stderr_nimbus.txt."+str(number_started_jobs+1) sj_nimbus.submit_job(jd_nimbus) jobs_nimbus.append(sj_nimbus) number_started_jobs = number_started_jobs + 1 for i in range(0, NUMBER_JOBS_EC2): print "Start job no.: " +str(number_started_jobs + 1) + " on EC2" sj_ec2 = bigjob_cloud.subjob(bigjob=bj_ec2) jd_ec2.output = "stdout_ec2.txt."+str(number_started_jobs+1) jd_ec2.error = "stderr_ec2.txt."+str(number_started_jobs+1) sj_ec2.submit_job(jd_ec2) jobs_ec2.append(sj_ec2) number_started_jobs = number_started_jobs + 1
cloud_bigjobs.append(bj_cloud) # reset intervall timer intervall_timer=time.time() print "Pilot Job/BigJob URL: " + bj_tg.pilot_url + " State: " + str(bj_tg.get_state_detail()) + " Time since launch: " + str(time.time()-start) for i in cloud_bigjobs: print "Pilot Job/BigJob URL: " + i.pilot_url + " State: " + str(i.get_state_detail()) + " Time since launch: " + str(time.time()-start) if number_started_jobs < NUMBER_JOBS: for bj in cloud_bigjobs: if bj!=None and str(bj.get_state_detail())=="Running": print "Cloud: Free nodes: " + str(bj.get_free_nodes()) if int(bj.get_free_nodes()) >= int(jd_nimbus.number_of_processes): print " Start job: " +str(number_started_jobs + 1) + " on " + str(jd_nimbus.number_of_processes) sj_nimbus = bigjob_cloud.subjob(bigjob=bj) jd_nimbus.output = "stdout_nimbus.txt."+str(number_started_jobs+1) jd_nimbus.error = "stderr_nimbus.txt."+str(number_started_jobs+1) sj_nimbus.submit_job(jd_nimbus) jobs_nimbus.append(sj_nimbus) number_started_jobs = number_started_jobs + 1 if str(bj_tg.get_state_detail())=="Running": print "TG: Free nodes: " + str(bj_tg.get_free_nodes()) if int(bj_tg.get_free_nodes()) >= int(jd.number_of_processes): print "Start job no.: " +str(number_started_jobs + 1) sj_tg = bigjob.subjob(advert_host) jd.output = "stdout_tg.txt."+str(number_started_jobs+1) jd.error = "stderr_tg.txt."+str(number_started_jobs+1) sj_tg.submit_job(bj_tg.pilot_url, jd) jobs_tg.append(sj_tg)