sj_ec2.submit_job(jd_ec2) jobs_ec2.append(sj_ec2) number_started_jobs = number_started_jobs + 1 for i in range(0, NUMBER_JOBS_GRID): print "Start job no.: " +str(number_started_jobs + 1) + " on Grid" sj_tg = bigjob.subjob(advert_host) jd.output = "stdout_tg.txt."+str(number_started_jobs+1) jd.error = "stderr_tg.txt."+str(number_started_jobs+1) sj_tg.submit_job(bj_tg.pilot_url, jd) jobs_tg.append(sj_tg) number_started_jobs = number_started_jobs + 1 for i in range(0, NUMBER_JOBS_CONDOR): print "Start job no.: " +str(number_started_jobs + 1) + " on Condor" sj_condor = bigjob_condor.subjob(bigjob=bj_condor) jd_condor.output = "stdout_condor.txt."+str(number_started_jobs+1) jd_condor.error = "stderr_condor.txt."+str(number_started_jobs+1) sj_condor.submit_job(jd_condor) jobs_condor.append(sj_condor) number_started_jobs = number_started_jobs + 1 """ Monitor Progress """ while True: try: print "*******************************************************************************************************" print "Pilot Job/BigJob URL: " + bj_ec2.pilot_url + " State: " + str(bj_ec2.get_state_detail()) + " Time since launch: " + str(time.time()-start) print "Pilot Job/BigJob URL: " + bj_nimbus.pilot_url + " State: " + str(bj_nimbus.get_state_detail()) + " Time since launch: " + str(time.time()-start) print "Pilot Job/BigJob URL: " + bj_tg.pilot_url + " State: " + str(bj_tg.get_state_detail()) + " Time since launch: " + str(time.time()-start) print "Pilot Job/BigJob URL: " + bj_condor.pilot_url + " State: " + str(bj_condor.get_state_detail()) + " Time since launch: " + str(time.time()-start)
jd.arguments = ["NPT.conf"] jd.working_directory = "/work/lukas/saga/bigjob/data" jd.output = "namd.$(CLUSTER).$(PROCESS).$(NODE).out" jd.error = "namd.$(CLUSTER).$(PROCESS).$(NODE).err" attr = open(CONDOR_BIN + "/condor_attr", "w") attr.write("universe = parallel\n") attr.write("machine_count = 2\n") # number of nodes (not cores) attr.write("+WantParallelSchedulingGroups = True\n") # to avoid running on nodes from different clusters attr.close() jobs = [] for i in range (0, NUMBER_JOBS): print "Start job no.: " + str(i) print time.ctime() sj = bigjob_condor.subjob(bigjob=bj) sj.submit_job(jd) jobs.append(sj) # busy wait for completion sj = jobs.pop() while 1: try: state = str(sj.get_state()) print "state: " + state if state=="Failed" or state=="Done" or state=="Canceled" or state=="Suspended": if len(jobs) > 0: sj = jobs.pop() continue else: break