resource_list[i].append({ \ "resource_url" : resource_url[i], \ "walltime": walltime , \ "number_nodes" : str(number_nodes), \ "cores_per_node" : cores_per_node[i], \ "allocation" : allocation[i], \ "queue" : queue[i], \ "bigjob_agent": bigjob_agent[i], \ "userproxy": resource_proxy[i], \ "working_directory": work_dir[i]}) logger.info("resource_url" + resource_url[i]) logger.info("affinity%s" % (i)) print "Create manyjob service " #create multiple manyjobs should be changed by bfast affinity implementation mjs.append(many_job.many_job_service(resource_list[i], None)) #file transfer step, check if prepare_shortreads parameter is set from job-conf file ### transfer the files index files if not (source_refgenome == "NONE"): for i in range(0, len(resources_used)): file_stage("file://%s" % (source_refgenome), ft_name[i] + bfast_ref_genome_dir[i]) #tarnsfer raw shortread files to first resource mentioned since it is enough to prepare short reads on resource if not (source_raw_reads == "NONE"): file_stage("file://" + source_shortreads, ft_name[0] + bfast_reads_dir[0]) if (prepare_shortreads == "true"): prep_reads_starttime = time.time #run the preparing read files step
def run(self): # create a logfile LOG_FILENAME = self.job_conf["log_filename"] print LOG_FILENAME self.logger = logging.getLogger("dare_multijob") hdlr = logging.FileHandler(LOG_FILENAME) formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s") hdlr.setFormatter(formatter) self.logger.addHandler(hdlr) self.logger.setLevel(logging.INFO) # first info in log file self.logger.info("Job id is " + self.job_conf["jobid"]) self.logger.info("RESOURCES used are " + self.job_conf["num_resources"]) try: # get resource info # start the big job agents resource_list = [] self.mjs = [] for i in range(0, int(self.job_conf["num_resources"])): resource_list.append([]) resource_list[i].append(dict_section(self.config, "resource_" + str(i))) # create multiple manyjobs print "Create manyjob service " self.mjs.append(many_job.many_job_service(resource_list[i], None)) total_number_of_jobs = 0 ### run the step wus_count = 0 for STEP in range(0, int(self.job_conf["num_steps"])): starttime = time.time() # job started update status if self.load_update_env == "true": jobmodel_helper.update_job_detail_status(self.job_conf["jobid"], "In step " + str(STEP + 1)) step_wus = self.job_conf["step_" + str(STEP)].split(",") if ("step_" + str(STEP)) not in self.job_conf["ft_steps"].split(","): ### submit the each step wus to bigjob for wu_count in range(0, len(step_wus)): wu = dict_section(self.config, step_wus[wu_count].strip()) wus_count = wus_count + 1 self.submit_wu(wu) self.wait_for_wus(wus_count) else: # time.sleep(10) for wu_count in range(0, len(step_wus)): fs = dict_section(self.config, step_wus[wu_count].strip()) self.submit_fs(fs) runtime = time.time() - starttime self.logger.info("STEP" + str(STEP) + " Runtime: " + str(runtime)) # all jobs done update status if self.load_update_env == "true": jobmodel_helper.update_job_detail_status(self.job_conf["jobid"], "") jobmodel_helper.update_job_status(self.job_conf["jobid"], 4) for i in range(0, int(self.job_conf["num_resources"])): self.mjs[i].cancel() except: traceback.print_exc(file=sys.stdout) try: for i in range(0, int(self.job_conf["num_resources"])): self.mjs[i].cancel() except: sys.exit()
# submit via mj abstraction resource_list = ( {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}, {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}, {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}, {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}, {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}, {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}, {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}, {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}, {"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "128", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"} ) #resource_list = [] #resource_list.append({"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "16", "allocation" : "loni_jha_big", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}) print "Create manyjob service " mjs = many_job.many_job_service(resource_list, None) jobs = [] job_start_times = {} job_states = {} cwd = os.getcwd() for i in range(0, NUMBER_JOBS): # create job description jd = saga.job.description() jd.executable = "/bin/date" jd.number_of_processes = "1" jd.spmd_variation = "single" jd.arguments = [""] jd.working_directory = "/work/luckow" jd.output = "/work/luckow/output/stdout-" + str(i) + ".txt" jd.error = "/work/luckow/output/stderr-" + str(i) + ".txt"
# submit via mj abstraction resource_list = [] i=0 mjs = [] for i in range(0,len(machines_used) ): print machine_proxy[i] resource_list.append({"gram_url" : gram_url[i], "walltime": "80" , "number_cores" : str(int(16)*2), "cores_per_node":cores_per_node[i],"allocation" : allocation[i], "queue" : queue[i], "re_agent": re_agent[i], "userproxy":machine_proxy[i], "working_directory": work_dir[i], "affinity" : "affinity%s"%(i)}) logger.info("gram_url" + gram_url[i]) logger.info("affinity%s"%(i)) print "Create manyjob service " mjs = many_job.many_job_service(resource_list, "advert.cct.lsu.edu") """ prep_reads_starttime = time.time() ### run the preparing read files step sub_jobs_submit("new", "4" ,"8","/bin/date", "2") ##dummy job for testing #sub_jobs_submit("reads" , "1", jd_executable_solid2fastq, str(bfast_num_cores)) prep_reads_runtime = time.time()-prep_reads_starttime logger.info("prepare reads Runtime: " + str( prep_reads_runtime))
"gram_url": "gram://qb1.loni.org/jobmanager-pbs", "number_cores": "8", "allocation": "loni_cybertools", "queue": "workq", "re_agent": "/home/yye00/ICAC/bigjob/advert_launcher.sh" }) #resource_list = [] #resource_list.append({"gram_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_cores" : "16", "allocation" : "loni_cybtertools", "queue" : "workq", "re_agent": "$(HOME)/src/REMDgManager/bigjob/advert_launcher.sh"}) print "Create manyjob service " mjs = many_job.many_job_service(resource_list, None) jobs = [] job_start_times = {} job_states = {} cwd = os.getcwd() for i in range(0, NUMBER_JOBS): # create job description jd = saga.job.description() jd.executable = "/bin/date" jd.number_of_processes = "1" jd.spmd_variation = "single" jd.arguments = [""] jd.working_directory = "/work/yye00" jd.output = "/work/yye00/output/stdout-" + str(i) + ".txt" jd.error = "/work/yye00/output/stderr-" + str(i) + ".txt"
def run(self): #create a logfile LOG_FILENAME = self.job_conf["log_filename"] print(LOG_FILENAME) self.logger = logging.getLogger('dare_multijob') hdlr = logging.FileHandler(LOG_FILENAME) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) self.logger.addHandler(hdlr) self.logger.setLevel(logging.INFO) #first info in log file self.logger.info("Job id is " + self.job_conf["jobid"]) self.logger.info("RESOURCES used are " + self.job_conf["num_resources"]) try: #get resource info #start the big job agents resource_list = [] self.mjs = [] for i in range(0, int(self.job_conf["num_resources"])): resource_list.append([]) resource_list[i].append( dict_section(self.config, "resource_" + str(i))) #create multiple manyjobs print("Create manyjob service ") self.mjs.append( many_job.many_job_service(resource_list[i], None)) total_number_of_jobs = 0 ### run the step wus_count = 0 for STEP in range(0, int(self.job_conf["num_steps"])): starttime = time.time() #job started update status if (self.load_update_env == "true"): jobmodel_helper.update_job_detail_status( self.job_conf["jobid"], "In step " + str(STEP + 1)) step_wus = self.job_conf["step_" + str(STEP)].split(',') if ("step_" + str(STEP)) not in self.job_conf["ft_steps"].split(','): ### submit the each step wus to bigjob for wu_count in range(0, len(step_wus)): wu = dict_section(self.config, step_wus[wu_count].strip()) wus_count = wus_count + 1 self.submit_wu(wu) self.wait_for_wus(wus_count) else: #time.sleep(10) for wu_count in range(0, len(step_wus)): fs = dict_section(self.config, step_wus[wu_count].strip()) self.submit_fs(fs) runtime = time.time() - starttime self.logger.info("STEP" + str(STEP) + " Runtime: " + str(runtime)) #all jobs done update status if (self.load_update_env == "true"): jobmodel_helper.update_job_detail_status( self.job_conf["jobid"], "") jobmodel_helper.update_job_status(self.job_conf["jobid"], 4) for i in range(0, int(self.job_conf["num_resources"])): self.mjs[i].cancel() except: traceback.print_exc(file=sys.stdout) try: for i in range(0, int(self.job_conf["num_resources"])): self.mjs[i].cancel() except: sys.exit()
crjc= int(resources_job_count[i]) cnnc= int(namd_jobs_size[i]) k=0 if (cnnc*crjc%cppn !=0): k =1 coress = cppn * (cnnc*crjc/cppn +k ) print namd_jobs_size[i],"vhjghjm", resources_job_count[i] resource_list[i].append({"gram_url" : gram_url[i], "walltime": walltime , "number_cores" : str(coress), "cores_per_node":cores_per_node[i], "allocation" : allocation[i], "queue" : queue[i], "re_agent": re_agent[i], "userproxy": resource_proxy[i], "working_directory": work_dir[i]}) logger.info("gram_url" + gram_url[i]) logger.info("affinity%s"%(i)) print "Create manyjob service " mjs.append(many_job.many_job_service(resource_list[i], None)) """ ### transfer the needed files if not (reads_refgnome == "NONE"): for i in range(0,len(resources_used) ): globus_file_stage("file://" + source_shortreads, ft_name[i]+bfast_reads_dir[i]) ### transfer the needed files p = 1 if not (source_shortreads == "NONE"): for i in range(0,len(resources_used) ): for k in range(p,p+4): cloud_file_stage(source_shortreads+"readss.%s.fastq"%(k), ft_name[i]+bfast_reads_dir[i]) p = p +4
"working_directory": (os.getcwd() + "/agent"), "walltime":3600 } resource_list = [] #resource_list.append( {"resource_url" : "gram://qb1.loni.org/jobmanager-pbs", "number_nodes" : "64", "allocation" : "<your allocation>", "queue" : "workq", "bigjob_agent": (os.getcwd() + "/bigjob_agent_launcher.sh")}) # "working_directory": (os.getcwd() + "/agent"), "walltime":10 }) resource_list.append(resource_dictionary) #resource_list.append( {"resource_url" : "pbspro://localhost/", "number_nodes" : "2", "processes_per_node":"4", "allocation" : "loni_jhabig12", "queue" : None, "bigjob_agent": (BIGJOB_HOME + "/bigjob_agent_launcher.sh"), # "working_directory": (os.getcwd() + "/agent"), "walltime":3600 }) #Flags for controlling dynamic BigJob add_additional_resources=False remove_additional_resources=False print "Create manyjob service " mjs = many_job.many_job_service(resource_list, "localhost") jobs = [] job_start_times = {} job_states = {} cwd = os.getcwd() for i in range(0, NUMBER_JOBS): # create job description jd = saga.job.description() jd.executable = "/bin/date" jd.number_of_processes = "1" jd.spmd_variation = "single" jd.arguments = [""] jd.working_directory = "/tmp" jd.output = "/tmp/stdout-" + str(i) + ".txt" jd.error = "/tmp/stderr-" + str(i) + ".txt"