def monitor_timestep(jobs): finished_jobs = [] while True: for j in jobs: if j in finished_jobs: continue try: j_done = mig.job_finished(j["job_id"]) except migerror.MigInterfaceError, e: log(str(e)) j_done = False if j_done: output_files = download_result(j) if verify_job_output(output_files, j): finished_jobs.append(j) clean_up_mig_home(output_files) else: return 1 update(status="waiting for jobs", state=STATE_RUNNING) if len(jobs) == len(finished_jobs): update(status="iteration finished", state=STATE_RUNNING) return 0 # success time.sleep(config.POLLING_INTERVAL)
def verify_job_output(output, job): for expected_output_file in job["result_files"]: if not expected_output_file in output: log("Could not get result file %s for job %s" % (expected_output_file, str(job))) return False return True
def main_solver(matlab_sh, matlab_bin, files, number_of_jobs, first_timestep, last_timestep): """ The main solver file start grid jobs for every timestep. """ #final_step = config.FINAL_TIMESTEP solver_data = {} solver_data["timesteps"] = [] solver_data["pid"] = os.getpid() solver_data["name"] = proc_name solver_data["start_timestep"] = start_timestep solver_data["grid_enabled"] = grid_enabled os.mkdir(config.results_dir_name) # for storing old result save_solver_data(proc_name, solver_data) for t in range(first_timestep, last_timestep-1, -1): # decending from timesteps print "starting new time step: ",t # global timestep timestep = str(t)#solver_data["timestep"] = t grid_jobs = submit_jobs(matlab_sh, matlab_bin, files, number_of_jobs, timestep) timestep_data = {"jobs" : grid_jobs, "timestep" : timestep} solver_data = load_solver_data(proc_name) solver_data["timesteps"].append(timestep_data) save_solver_data(proc_name, solver_data) update(status="starting iteration", state=STATE_RUNNING) log("entering monitor") exit_code = monitor_timestep(grid_jobs) # returns when jobs are done if exit_code: print "Monitor error." #update(status="Error: Could not find result", state=STATE_FAILED) return 1 postprocess(grid_jobs, timestep) log(timestep+" done. starting next") clean_up_mig_home(files) clean_upload_dir() return 0
def main_solver(matlab_sh, matlab_bin, files, number_of_jobs, first_timestep, last_timestep): """ The main solver file start grid jobs for every timestep. """ #final_step = config.FINAL_TIMESTEP solver_data = {} solver_data["timesteps"] = [] solver_data["pid"] = os.getpid() solver_data["name"] = proc_name solver_data["start_timestep"] = start_timestep solver_data["grid_enabled"] = grid_enabled os.mkdir(config.results_dir_name) # for storing old result save_solver_data(proc_name, solver_data) for t in range(first_timestep, last_timestep - 1, -1): # decending from timesteps print "starting new time step: ", t # global timestep timestep = str(t) #solver_data["timestep"] = t grid_jobs = submit_jobs(matlab_sh, matlab_bin, files, number_of_jobs, timestep) timestep_data = {"jobs": grid_jobs, "timestep": timestep} solver_data = load_solver_data(proc_name) solver_data["timesteps"].append(timestep_data) save_solver_data(proc_name, solver_data) update(status="starting iteration", state=STATE_RUNNING) log("entering monitor") exit_code = monitor_timestep(grid_jobs) # returns when jobs are done if exit_code: print "Monitor error." #update(status="Error: Could not find result", state=STATE_FAILED) return 1 postprocess(grid_jobs, timestep) log(timestep + " done. starting next") clean_up_mig_home(files) clean_upload_dir() return 0
def postprocess(jobs, timestep): """ run this code between each iterations. Merges the results using matlab code. """ cmd = "matlab -nodesktop -nojvm -r '%s(%i);quit();'" % (os.path.basename( config.postprocessing_code)[:-2], num_jobs) log("Post processing....") print cmd proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() #if err: log(" ".join([out, err])) # move the files to the results dir to clean up our working directory for j in jobs: for f in j["result_files"]: src = os.path.join(os.getcwd(), f) new_name = "t_" + timestep + "_" + f dst = os.path.join(os.getcwd(), config.results_dir_name, new_name) if os.path.exists(src): shutil.move(src, dst) else: log("Error: could not find result file %s after post processing." % src)
def postprocess(jobs, timestep): """ run this code between each iterations. Merges the results using matlab code. """ cmd = "matlab -nodesktop -nojvm -r '%s(%i);quit();'" % (os.path.basename(config.postprocessing_code)[:-2], num_jobs) log("Post processing...." ) print cmd proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() #if err: log(" ".join([out, err])) # move the files to the results dir to clean up our working directory for j in jobs: for f in j["result_files"]: src = os.path.join(os.getcwd(), f) new_name = "t_" + timestep + "_" + f dst = os.path.join(os.getcwd(), config.results_dir_name, new_name) if os.path.exists(src): shutil.move(src, dst) else: log("Error: could not find result file %s after post processing." % src)
def verify_job_output(output,job): for expected_output_file in job["result_files"]: if not expected_output_file in output: log("Could not get result file %s for job %s" % (expected_output_file, str(job))) return False return True