def set_priority(self, priority=0): """ Setting priority to jobs """ logging.info('\t---> Setting priority Chunk %d %s %s' % (self.chunk_id, datetime2datewrf( self.start_date), datetime2datewrf(self.end_date))) l_jobs = self.job.filter_by(status=Job.Status.SUBMITTED).all() if not (l_jobs): logging.info('\t\tThere are not jobs to set priority.') else: for job in l_jobs: job.dryrun = self.dryrun job.set_priority(priority)
def cancel(self, hard=False): """ Delete jobs """ logging.info('\t---> Canceling Chunk %d %s %s' % (self.chunk_id, datetime2datewrf( self.start_date), datetime2datewrf(self.end_date))) l_jobs = self.job.filter( and_(Job.status != Job.Status.PREPARED, Job.status != Job.Status.FINISHED, Job.status != Job.Status.FAILED, Job.status != Job.Status.CANCEL)).all() if not (l_jobs): logging.info('\t\tThere are not jobs to cancel.') else: for job in l_jobs: job.dryrun = self.dryrun job.cancel(hard)
def wps2wrf( namelist_wps, namelist_input, sdate, edate, maxdom, chunk_is_restart, timestep_dxfactor='6') : nmlw = fn.FortranNamelist( namelist_wps ) nmli = fn.WrfNamelist( namelist_input ) nmli.setValue("max_dom", maxdom) for var in ["run_days", "run_hours", "run_minutes", "run_seconds"]: nmli.setValue(var, 0) nmli.setMaxDomValue("start_year", sdate.year) nmli.setMaxDomValue("start_month", sdate.month) nmli.setMaxDomValue("start_day", sdate.day) nmli.setMaxDomValue("start_hour", sdate.hour) nmli.setMaxDomValue("end_year", edate.year) nmli.setMaxDomValue("end_month", edate.month) nmli.setMaxDomValue("end_day", edate.day) nmli.setMaxDomValue("end_hour", edate.hour) for var in [ "parent_grid_ratio", "i_parent_start", "j_parent_start", "e_we", "e_sn"]: nmli.setValue(var, nmlw.getValue(var)) nmli.setValue("parent_time_step_ratio", nmlw.getValue("parent_grid_ratio")) if exists("met_em.d01.%s.nc" % datetime2datewrf( sdate ) ): # If there are met_em files, we need to run real.exe. Otherwise, we # cannot get enough info (num_metgrid_*levels) to run real.exe nmli.setValue("num_metgrid_levels", get_num_metgrid_levels()) nmli.setValue("num_metgrid_soil_levels", get_num_metgrid_soil_levels()) # # Compute the grid spacings. Read them from met_em files if the projection is lat-lon. # nmli.setValue("grid_id", list(range(1, maxdom+1))) # Update parant_id in the namelist nmli.setValue("parent_id", nmlw.getValue("parent_id")) alldx = [] for idom in range(1,maxdom + 1): thisdx = get_latlon_dx(sdate, "d0%i" % idom) alldx.append(thisdx) nmli.setValue("dx", alldx) nmli.setValue("dy", alldx) # May be an issue for global WRF # # Compute the time step. # if timestep_dxfactor.startswith("manual:"): nmli.setValue("time_step", int(timestep_dxfactor[7:])) elif timestep_dxfactor.startswith("adaptive:"): nmli.setValue("use_adaptive_time_step", ".true.", "domains") else: nmli.setValue("time_step", get_time_step(nmli.getValue("dx")[0], eval(timestep_dxfactor))) nmli.setValue("restart", chunk_is_restart) # # Trim, check, overwrite the file and ... we are done! # #nmli.trimMaxDom() nmli.wrfCheck() #nmli.extendMaxDomVariables() nmli.overWriteNamelist()
def set_restart(self, restart_date): """ Setting restart date. """ try: datetime_restart_date = datewrf2datetime(restart_date) except: raise Exception("ERROR: restart date is malformed") else: logging.info('---> Setting restart date %s' % datetime2datewrf(datetime_restart_date)) self.restart = datetime_restart_date
def cycle_chunks(self): """ Create chunks the needed for a realization """ # Define which calendar is going to be used exp_calendar = Calendar(self.cfg['calendar']) chunk_id = 1 chunk_start_date = self.start_date while chunk_start_date < self.end_date: chunk_end_date = exp_calendar.add(chunk_start_date, self.chunk_size) if chunk_end_date > self.end_date: chunk_end_date = self.end_date # Check chunk on the database ch = self.check_db(rea_id=self.id, chunk_start_date=chunk_start_date, chunk_end_date=chunk_end_date, chunk_id=chunk_id) if not ch: logging.info("\t\t---> Chunk %d %s %s" % (chunk_id, datetime2datewrf(chunk_start_date), datetime2datewrf(chunk_end_date))) # Create Chunk ch = Chunk() ch.rea_id = self.id ch.start_date = chunk_start_date ch.end_date = chunk_end_date ch.wps = 0 ch.chunk_id = chunk_id ch.status = Chunk.Status.PREPARED # Add realization to the experiment self.chunk.append(ch) chunk_start_date = chunk_end_date chunk_id = chunk_id + 1 # Set the number of chunks of a relaization self.nchunks = chunk_id - 1
def get_latlon_dx(start_date, dom): # Try to get dx from the met_em or wrfinput files. Only # required for lat-lon grids, otherwise it is available # in the namelist.wps file file_name = "met_em.%s.%s.nc" % ( dom, datetime2datewrf( start_date ) ) if exists( file_name ) : dxfile = file_name file_name = "wrfinput_%s" % dom if exists( file_name ) : dxfile = file_name if dxfile: shcmd = "ncdump -h %s | grep 'DX =' | sed -e 's/^\t//' | tr '=;' ' ' | awk '{printf \"%%f\", $2}'" % dxfile rval = round(float(os.popen(shcmd).read().strip()), 4) else: raise Exception('get_latlon_dx: no met_em or wrfinput file found') return rval
def run(self, index, rerun=False, priority=0): """ Run a chunk is run a drm4g job """ #Send a gridway's job and save data in table Job gw_job = GWJob() # create template rea_name = self.realization.name exp_name = self.realization.experiment.name exp_path = join(WRF4G_DIR, 'var', 'submission', exp_name) rea_path = join(exp_path, rea_name) wrf4g_package = join(exp_path, "WRF4G.tar.gz") if not exists(wrf4g_package): raise Exception("'%s' file does not exist" % wrf4g_package) # files to add for the inputsandbox inputsandbox = "file://%s," % wrf4g_package inputsandbox += "file://%s/db.conf," % exp_path inputsandbox += "file://%s/experiment.wrf4g," % exp_path inputsandbox += "file://%s/realization.json," % rea_path inputsandbox += "file://%s/namelist.input" % rea_path # Add input file if it is exist input_files = join(exp_path, 'wrf4g_files.tar.gz') if exists(input_files): inputsandbox += ",file://%s" % (input_files) # files to add for the outputsandbox outputsandbox = "log_%d_${JOB_ID}.tar.gz, events.pkl" % self.chunk_id arguments = '%s %s %d %s %s %d' % (exp_name, rea_name, self.chunk_id, datetime2datewrf(self.start_date), datetime2datewrf(self.end_date), 1 if rerun else 0) # Create the job template file_template = gw_job.create_template( name=rea_name, directory=rea_path, arguments=arguments, np=int(self.realization.cfg.get('np', '1')), req=self.realization.cfg.get('requirements', ''), environ=self.realization.cfg.get('environment', ''), inputsandbox=inputsandbox, outputsandbox=outputsandbox) # Submit the template job = Job() #create an object "job" time.sleep(0.1) # if the first chunk of the realization if index == 0: job.gw_job = gw_job.submit(priority=priority, file_template=file_template) else: # if the chunk is not the first of the realization, # gwsubmit has an argument, gw_job of the job before chunk_before_id = self.chunk_id - 1 chunk_before = self.realization.chunk.\ filter( Chunk.chunk_id == chunk_before_id ).one() job_before = chunk_before.job.order_by(Job.id)[-1] id_job_before = job_before.id gw_job_before = job_before.gw_job job.gw_job = gw_job.submit(dep=gw_job_before, priority=priority, file_template=file_template) job.chunk_id = self.chunk_id job.run(rerun) self.job.append(job) # Update realizaiton status self.status = Chunk.Status.SUBMITTED
def get_restart(self): """ Get restart date. """ logging.info(datetime2datewrf(self.restart))
def run(self, first_chunk_run=None, last_chunk_run=None, rerun=False, priority=0): """ Run n_chunk of the realization. If n_chunk=0 run every chunk of the realization which haven't finished yet else run (n_chunk) chunks since the last one finished """ first_chunk_run = int(first_chunk_run) if first_chunk_run else None last_chunk_run = int(last_chunk_run) if last_chunk_run else None #Check the status of the realization if self.status == Realization.Status.FINISHED and not rerun: logging.warn("\tRealization '%s' already finished." % self.name) elif (self.status == Realization.Status.SUBMITTED or self.status == Realization.Status.RUNNING) and not rerun: logging.warn("\tRealization '%s' has been submitted." % self.name) elif first_chunk_run and first_chunk_run < 0: logging.error("\tERROR: The first chunk to run is '%d'." % first_chunk_run) elif last_chunk_run and last_chunk_run < 0: logging.error("\tERROR: The last chunk to run is '%d'." % last_chunk_run) elif (last_chunk_run and first_chunk_run) and last_chunk_run < first_chunk_run: logging.error( "\tERROR: The last chunk to run is greater than the fist one.") elif last_chunk_run > self.nchunks: logging.error("\tERROR: The last chunk does not exist.") elif first_chunk_run > self.nchunks: logging.error("\tERROR: The first chunk does not exist.") else: # search first chunk to run if rerun and first_chunk_run: ch = self.chunk.filter(Chunk.chunk_id == first_chunk_run).one() self.restart = ch.start_date self.current_date = ch.start_date self.current_chunk = first_chunk_run elif rerun and not first_chunk_run: self.restart = None first_chunk_run = self.current_chunk = 1 else: #search first chunk to run if not self.restart: # run every chunks of the realization first_chunk_run = 1 else: #search chunk with end_date>restart and start_date<restart try: first_chunk = self.chunk.filter( and_(Chunk.start_date <= self.restart, Chunk.end_date >= self.restart)).all()[-1] except: raise Exception('There are not chunks to run.') else: if first_chunk_run and first_chunk.chunk_id != first_chunk_run: raise Exception('Use the option --rerun.') else: first_chunk_run = self.current_chunk = first_chunk.chunk_id #search last chunk to run if not last_chunk_run: #run every chunk #Search last chunk of the realization last_chunk_run = self.nchunks else: #search last chunk last_chunk_run = last_chunk_run #Search chunks to run l_chunks = self.chunk.filter( and_(Chunk.chunk_id >= first_chunk_run, Chunk.chunk_id <= last_chunk_run)).all() #run chunks for index, chunk in enumerate(l_chunks): #print data of chunks logging.info( '\t---> Submitting Chunk %d %s %s' % (chunk.chunk_id, datetime2datewrf( chunk.start_date), datetime2datewrf(chunk.end_date))) if not self.dryrun: chunk.run(index, rerun, priority) if not self.dryrun: # Update reealizaiton status self.status = Realization.Status.SUBMITTED
def file_name_wrf(self): return self.file_name + datetime2datewrf(self.date)
def date_wrf(self): return datetime2datewrf(self.date)
def launch_wrapper(params): """ Prepare and launch the job wrapper """ ## # Create log directory ## try: os.makedirs(params.log_path) except: raise JobError( "Error creating the directory" "'%s' on the worker node" % params.log_path, Job.CodeError.LOG_PATH) ## # Logging configuration ## logging.basicConfig(format='%(asctime)s %(message)s', filename=params.log_file, level=params.log_level) ## # Show information about paths ## logging.info('Information about directories') # Show root path logging.info('Root path = %s' % params.root_path) # Show local path logging.info('Run path = %s' % params.local_path) ## # DRM4G won't remove root_path if clean_after_run is 1 ## if params.clean_after_run == 'no': logging.info("Creating a .lock file") f = open(join(params.root_path, '.lock'), 'w') f.close() ## # Get database session ## job_db = JobDB(params.job_id) try: ## # Check if this job should run ## if job_db.get_job_status() == Job.Status.CANCEL: raise JobError("Error this job should not run", Job.CodeError.JOB_SHOULD_NOT_RUN) job_db.set_job_status(Job.Status.RUNNING) ## # Create a remote tree directory for the realization ## logging.info("Creating remote tree directory under '%s'" % params.output_path) job_db.set_job_status(Job.Status.CREATE_OUTPUT_PATH) for remote_path in [ params.output_path, params.exp_output_path, params.rea_output_path, params.out_rea_output_path, params.rst_rea_output_path, params.real_rea_output_path, params.log_rea_output_path ]: vcp_dir = VCPURL(remote_path) if not vcp_dir.exists(): logging.info("Creating remote directory '%s'" % remote_path) vcp_dir.mkdir() ## # Copy configured files to the ouput path ## logging.info("Copy configured files to '%s'" % params.output_path) for conf_file in [ "db.conf", "experiment.wrf4g", "realization.json", "namelist.input" ]: oring = join(params.root_path, conf_file) dest = join(params.rea_output_path, conf_file) try: copy_file(oring, dest) except: logging.warning("Error copying file '%s' to '%s'" % (oring, dest)) ## # Setting PATH and LD_LIBRARY_PATH ## logging.info('Setting PATH and LD_LIBRARY_PATH variables') root_bin_path = join(params.root_path, 'bin') PATH = '%s:%s' % (root_bin_path, os.environ.get('PATH')) logging.info("PATH=%s" % PATH) os.environ['PATH'] = PATH LD_LIBRARY_PATH = '%s:%s:%s' % (join( params.root_path, 'lib'), join( params.root_path, 'lib64'), os.environ.get('LD_LIBRARY_PATH')) logging.info("LD_LIBRARY_PATH=%s" % LD_LIBRARY_PATH) os.environ['LD_LIBRARY_PATH'] = LD_LIBRARY_PATH PYTHONPATH = '%s:%s' % (join(params.root_path, 'lib', 'python'), os.environ.get('PYTHONPATH')) logging.info("PYTHONPATH=%s" % PYTHONPATH) os.environ['PYTHONPATH'] = PYTHONPATH if 'wrf_all_in_one' in params.app: OPAL_PREFIX = params.root_path logging.info("OPAL_PREFIX=%s" % OPAL_PREFIX) os.environ['OPAL_PREFIX'] = OPAL_PREFIX ## # Configure app ## logging.info('Configure app') job_db.set_job_status(Job.Status.CONF_APP) archives_path = join(params.root_path, 'archives') logging.info("Creating '%s' directory" % archives_path) os.makedirs(archives_path) for app in params.app.split('\n'): app_tag, app_type, app_value = app.split('|', 2) if 'bundle' in app_type: oring = app_value.strip() dest = join(archives_path, basename(app_value.strip())) try: logging.info("Trying to copy '%s'" % oring) copy_file(oring, dest) except: raise JobError("'%s' has not copied" % oring, Job.CodeError.COPY_APP) else: logging.info("Unpacking '%s' to '%s'" % (dest, params.root_path)) extract(dest, to_path=params.root_path) elif 'command' in app_type: logging.info('Configuring source script for %s' % app_tag) app_cmd = "{ %s; } && env" % app_value.strip() code, output = exec_cmd(app_cmd) if code: logging.info(output) raise JobError( "Error executing source script for %s" % app_tag, Job.CodeError.SOURCE_SCRIPT) for line in output.splitlines(): if "=" in line and not "(" in line: try: key, value = line.split("=", 1) except: pass else: logging.debug("%s=%s" % (key, value)) os.environ[key] = value else: raise JobError("Error app type does not exist", Job.CodeError.APP_ERROR) wrf4g_files = join(params.root_path, 'wrf4g_files.tar.gz') if isfile(wrf4g_files): logging.info("Unpacking '%s'" % wrf4g_files) extract(wrf4g_files, to_path=params.root_path) ## # Clean archives directory ## shutil.rmtree(archives_path) ## # Set bin files execute by the group ## logging.info('Setting bin files execute by the group') for exe_file in os.listdir(root_bin_path): os.chmod(join(root_bin_path, exe_file), stat.S_IRWXU) if 'wrf_all_in_one' in params.app: os.chmod(join(params.root_path, 'WPS', 'ungrib', 'ungrib.exe'), stat.S_IRWXU) os.chmod(join(params.root_path, 'WPS', 'metgrid', 'metgrid.exe'), stat.S_IRWXU) os.chmod(join(params.root_path, 'WRFV3', 'run', 'real.exe'), stat.S_IRWXU) os.chmod(join(params.root_path, 'WRFV3', 'run', 'wrf.exe'), stat.S_IRWXU) ## # This is a little bit tricky prepare the pallalel environment. ## if ( params.parallel_real == 'yes' or params.parallel_wrf == 'yes' ) and \ ( params.local_path != params.root_path ) : logging.info("Wiping the directory '%s' on all worker nodes" % params.local_path) code, output = exec_cmd( "%s rm -rf %s" % (params.parallel_run_pernode, params.local_path)) if code: logging.info(output) raise JobError( "Error wiping the directory '%s' on worker nodes" % (params.local_path), Job.CodeError.LOCAL_PATH) code, output = exec_cmd( "%s mkdir -p %s" % (params.parallel_run_pernode, params.local_path)) if code: logging.info(output) raise JobError("Error creating direcory in all worker nodes", Job.CodeError.COPY_FILE) for directory in ['WPS', 'WRFV3']: exec_cmd( "%s cp -r %s %s" % (params.parallel_run_pernode, join(params.root_path, directory), params.local_path)) if not exists(join(params.local_path, directory)): raise JobError( "Error copying '%s' directory to all worker nodes" % directory, Job.CodeError.COPY_FILE) ## # Binaries for execution ## ungrib_exe = metgrid_exe = real_exe = wrf_exe = None if 'wrf_all_in_one' in params.app: ungrib_exe = join(params.wps_path, 'ungrib', 'ungrib.exe') metgrid_exe = join(params.wps_path, 'metgrid', 'metgrid.exe') real_exe = join(params.wrf_run_path, 'real.exe') wrf_exe = join(params.wrf_run_path, 'wrf.exe') else: ungrib_exe = which('ungrib.exe') metgrid_exe = which('metgrid.exe') real_exe = which('real.exe') wrf_exe = which('wrf.exe') if not ungrib_exe or not metgrid_exe or not real_exe or not wrf_exe: raise JobError("Error finding WRF binaries", Job.CodeError.BINARY) ## # Obtain information about the WN ## logging.info('Obtaining information about the worker node') # Host info logging.info('Host name = %s' % get_hostname()) # OS info logging.info('Linux release = %s' % os_release()) # CPU info model_name, number_of_cpus = cpu_info() logging.info('CPU (model) = %s' % model_name) logging.info('CPU (processors) = %d' % number_of_cpus) # Memory info logging.info('RAM Memory = %s MB' % mem_info()) # Disk space check logging.info('DiskSpace of %s = %d GB' % (params.root_path, disk_space_check(params.root_path))) ## # Check the restart date ## logging.info('Checking restart date') rdate = job_db.get_restart_date() if not rdate or params.rerun: logging.info("Restart date will be '%s'" % params.chunk_sdate) if params.nchunk > 1: chunk_rerun = ".T." else: chunk_rerun = ".F." elif rdate >= params.chunk_sdate and rdate < params.chunk_edate: logging.info("Restart date will be '%s'" % rdate) params.chunk_rdate = rdate chunk_rerun = ".T." elif rdate == params.chunk_edate: raise JobError("Restart file is the end date", Job.CodeError.RESTART_MISMATCH) else: raise JobError("There is a mismatch in the restart date", Job.CodeError.RESTART_MISMATCH) if chunk_rerun == ".T.": pattern = "wrfrst*" + datetime2dateiso(params.chunk_rdate) + '*' files_downloaded = 0 for file_name in VCPURL(params.rst_rea_output_path).ls(pattern): # file will follow the pattern: wrfrst_d01_19900101T000000Z.nc orig = join(params.rst_rea_output_path, file_name) dest = join(params.wrf_run_path, WRFFile(file_name).file_name_wrf()) try: logging.info("Downloading file '%s'" % file_name) copy_file(orig, dest) except: raise JobError("'%s' has not copied" % file_name, Job.CodeError.COPY_RST_FILE) files_downloaded += 1 if not files_downloaded: raise JobError("No restart file has been downloaded", Job.CodeError.COPY_RST_FILE) job_db.set_job_status(Job.Status.DOWN_RESTART) ## # Either WPS runs or the boundaries and initial conditions are available ## #Copy namelist.input to wrf_run_path shutil.copyfile(join(params.root_path, 'namelist.input'), params.namelist_input) if job_db.has_wps(): logging.info("The boundaries and initial conditions are available") orig = join(params.domain_path, basename(params.namelist_wps)) dest = params.namelist_wps try: logging.info("Downloading file 'namelist.wps'") copy_file(orig, dest) except: raise JobError("'namelist.wps' has not copied", Job.CodeError.COPY_FILE) wps2wrf(params.namelist_wps, params.namelist_input, params.chunk_rdate, params.chunk_edate, params.max_dom, chunk_rerun, params.timestep_dxfactor) job_db.set_job_status(Job.Status.DOWN_WPS) pattern = "wrf[lbif]*_d\d\d_" + datetime2dateiso(sdate) + "*" for file_name in VCPURL(params.real_rea_output_path).ls(pattern): orig = join(params.real_rea_output_path, file_name) # From wrflowinp_d08_ we remove the _ at the end dest = join(params.wrf_run_path, WRFFile(file_name).file_name[:-1]) try: logging.info("Downloading file '%s'" % file_name) copy_file(orig, dest) except: raise JobError("'%s' has not copied" % file_name, Job.CodeError.COPY_REAL_FILE) else: logging.info( "The boundaries and initial conditions are not available") # Change the directory to wps path os.chdir(params.wps_path) ## # Get geo_em files and namelist.wps ## logging.info("Download geo_em files and namelist.wps") for file_name in VCPURL(params.domain_path).ls('*'): if '.nc' in file_name or 'namelist' in file_name: orig = join(params.domain_path, file_name) dest = join(params.wps_path, file_name) try: logging.info("Downloading file '%s'" % file_name) copy_file(orig, dest) except: raise JobError("'%s' has not copied" % file_name, Job.CodeError.COPY_BOUND) job_db.set_job_status(Job.Status.DOWN_BOUND) ## # Modify the namelist ## logging.info("Modify namelist.wps") try: nmlw = fn.FortranNamelist(params.namelist_wps) nmlw.setValue("max_dom", params.max_dom) nmlw.setValue( "start_date", params.max_dom * [datetime2datewrf(params.chunk_sdate)]) nmlw.setValue( "end_date", params.max_dom * [datetime2datewrf(params.chunk_edate)]) nmlw.setValue("interval_seconds", params.extdata_interval) nmlw.overWriteNamelist() except Exception as err: raise JobError("Error modifying namelist: %s" % err, Job.CodeError.NAMELIST_FAILED) ## # Preprocessor and Ungrib ## logging.info("Run preprocessors and ungrib") for i, (vt, pp, epath) in enumerate( zip( params.extdata_vtable.replace(' ', '').split(','), params.preprocessor.replace(' ', '').split(','), params.extdata_path.replace(' ', '').split(','))): try: nmlw = fn.FortranNamelist(params.namelist_wps) nmlw.setValue("prefix", vt, "ungrib") nmlw.overWriteNamelist() except Exception as err: raise JobError("Error modifying namelist: %s" % err, Job.CodeError.NAMELIST_FAILED) vtable = join(params.wps_path, 'Vtable') if isfile(vtable): os.remove(vtable) # This creates a symbolic link os.symlink( join(params.wps_path, 'ungrib', 'Variable_Tables', 'Vtable.%s' % vt), vtable) ## # Execute preprocesor ## logging.info("Running preprocessor.%s" % pp) if not which("preprocessor.%s" % pp): raise JobError("Preprocessor '%s' does not exist" % pp, Job.CodeError.PREPROCESSOR_FAILED) optargs = "" for arg in params.preprocessor_optargs.values(): optargs = optargs + " " + arg.split(',')[i] preprocessor_log = join(params.log_path, 'preprocessor.%s.log' % pp) code, output = exec_cmd( "preprocessor.%s %s %s %s %s &> %s" % (pp, datetime2datewrf(params.chunk_rdate), datetime2datewrf(params.chunk_edate), epath, optargs, preprocessor_log)) if code: logging.info(output) raise JobError("Preprocessor '%s' has failed" % pp, Job.CodeError.PREPROCESSOR_FAILED) grb_data_path = join(params.wps_path, 'grbData') for grib_file in glob.glob(join(params.wps_path, 'GRIBFILE.*')): os.remove(grib_file) try: for grib_file_to_link, suffixe in zip( glob.glob(join(grb_data_path, '*')), list( map( ''.join, itertools.product(string.ascii_uppercase, repeat=3)))): try: os.symlink( grib_file_to_link, join(params.wps_path, "GRIBFILE." + suffixe)) except: raise JobError("Error linking grib files", Job.CodeError.LINK_GRIB_FAILED) except: raise JobError("Ran out of grib file suffixes", Job.CodeError.LINK_GRIB_FAILED) ## # Run Ungrib ## logging.info("Run ungrib") job_db.set_job_status(Job.Status.UNGRIB) ungrib_log = join(params.log_path, 'ungrib_%s.log' % vt) code, output = exec_cmd("%s > %s" % (ungrib_exe, ungrib_log)) if code or not 'Successful completion' in open( ungrib_log, 'r').read(): logging.info(output) raise JobError("'%s' has failed" % ungrib_exe, Job.CodeError.UNGRIB_FAILED) else: logging.info("ungrib has successfully finished") ## # Update namelist.wps ## logging.info("Update namelist for metgrid") try: nmlw = fn.FortranNamelist(params.namelist_wps) nmlw.setValue( "fg_name", params.extdata_vtable.replace(' ', '').split(','), "metgrid") if params.constants_name: nmlw.setValue( "constants_name", params.constants_name.replace(' ', '').split(','), "metgrid") for var_to_del in [ 'opt_output_from_metgrid_path', 'opt_output_from_geogrid_path', 'opt_metgrid_tbl_path', 'opt_geogrid_tbl_path' ]: nmlw.delVariable(var_to_del) nmlw.overWriteNamelist() except Exception as err: raise JobError("Error modifying namelist: %s" % err, Job.CodeError.NAMELIST_FAILED) ## # Run Metgrid ## logging.info("Run metgrid") job_db.set_job_status(Job.Status.METGRID) metgrid_log = join(params.log_path, 'metgrid.log') code, output = exec_cmd("%s > %s" % (metgrid_exe, metgrid_log)) if code or not 'Successful completion' in open(metgrid_log, 'r').read(): logging.info(output) raise JobError("'%s' has failed" % metgrid_exe, Job.CodeError.METGRID_FAILED) else: logging.info("metgrid has successfully finished") ## # Run real ## # Change the directory to wrf run path os.chdir(params.wrf_run_path) # Create a sumbolic link to run real met_files = glob.glob(join(params.wps_path, 'met_em.d*')) for met_file in met_files: os.symlink(met_file, join(params.wrf_run_path, basename(met_file))) fix_ptop(params.namelist_input) wps2wrf(params.namelist_wps, params.namelist_input, params.chunk_rdate, params.chunk_edate, params.max_dom, chunk_rerun, params.timestep_dxfactor) if ( params.parallel_real == 'yes' or params.parallel_wrf == 'yes' ) and \ ( params.local_path != params.root_path ) : logging.info("Copying namelist file to al WNs") bk_namelist = join(params.root_path, 'namelist.input.bk') shutil.copyfile(params.namelist_input, bk_namelist) code, output = exec_cmd( "%s cp %s %s" % (params.parallel_run_pernode, bk_namelist, params.namelist_input)) if code: logging.info(output) raise JobError("Error copying namelist to all WNs", Job.CodeError.COPY_FILE) logging.info("Run real") job_db.set_job_status(Job.Status.REAL) if params.parallel_real == 'yes': real_log = join(params.wrf_run_path, 'rsl.out.0000') cmd = "%s wrf_launcher.sh %s" % (params.parallel_run, real_exe) code, output = exec_cmd(cmd) if isfile(real_log): real_rsl_path = join(params.log_path, 'rsl_real') os.mkdir(real_rsl_path) rsl_files = glob.glob(join(params.wrf_run_path, 'rsl.*')) for rsl_file in rsl_files: shutil.copyfile( rsl_file, join(real_rsl_path, basename(rsl_file))) else: real_log = join(params.log_path, 'real.log') code, output = exec_cmd("wrf_launcher.sh %s > %s" % (real_exe, real_log)) if code or not 'SUCCESS COMPLETE' in open(real_log, 'r').read(): logging.info(output) raise JobError("'%s' has failed" % real_exe, Job.CodeError.REAL_FAILED) else: logging.info("real has successfully finished") ## # Check if wps files has to be storaged ## if params.save_wps == 'yes': logging.info("Saving wps") job_db.set_job_status(Job.Status.UPLOAD_WPS) # If the files are WPS, add the date to the name. Three files have to be uploaded: wrfinput_d0?,wrfbdy_d0? and wrflowinp_d0? # The command: $ upload_file wps 1990-01-01_00:00:00 # will create in the repositore three files with the following format: wrfinput_d01_19900101T000000Z suffix = "_" + datetime2dateiso(params.chunk_rdate) + ".nc" for wps_file in VCPURL(params.wps_path).ls("wrf[lbif]*_d\d\d"): oiring = wps_file dest = join(params.real_rea_output_path, basename(wps_file), suffix) try: logging.info("Uploading '%s' file" % oiring) os.chmod( oiring, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH) copy_file(oiring, dest) except: raise JobError("'%s' has not copied" % oiring, Job.CodeError.COPY_UPLOAD_WPS) job_db.set_wps() # Change the directory to wrf run path os.chdir(params.wrf_run_path) ## # Start a thread to monitor wrf ## if params.parallel_wrf == 'yes': log_wrf = join(params.wrf_run_path, 'rsl.out.0000') else: log_wrf = join(params.log_path, 'wrf.log') worker = threading.Thread(target=wrf_monitor, args=(job_db, log_wrf, params)) worker.setDaemon(True) worker.start() ## # Wipe WPS path ## if params.clean_after_run == 'yes': logging.info("Wiping '%s' directory " % params.wps_path) try: shutil.rmtree(params.wps_path) except: logging.info("Error wiping '%s' directory " % params.wps_path) ## # Run wrf ## logging.info("Run wrf") job_db.set_job_status(Job.Status.WRF) if params.parallel_wrf == 'yes': cmd = "%s wrf_launcher.sh %s" % (params.parallel_run, wrf_exe) code, output = exec_cmd(cmd) if isfile(log_wrf): wrf_rsl_path = join(params.log_path, 'rsl_wrf') os.mkdir(wrf_rsl_path) rsl_files = glob.glob(join(params.wrf_run_path, 'rsl.*')) for rsl_file in rsl_files: shutil.copyfile(rsl_file, join(wrf_rsl_path, basename(rsl_file))) else: code, output = exec_cmd("wrf_launcher.sh %s > %s" % (wrf_exe, log_wrf)) if code or not 'SUCCESS COMPLETE' in open(log_wrf, 'r').read(): logging.info(output) raise JobError("'%s' has failed" % wrf_exe, Job.CodeError.WRF_FAILED) else: logging.info("wrf has successfully finished") ## # Update current date ## current_date = get_current_date(log_wrf) if not current_date: current_date = params.chunk_rdate job_db.set_current_date(current_date) ## # Save all files ## clean_wrf_files(job_db, params, clean_all=True) ## # Wipe after run ## if ( params.parallel_real == 'yes' or params.parallel_wrf == 'yes' ) and \ ( params.local_path != params.root_path ) and ( params.clean_after_run == 'yes' ) : logging.info("Wiping the directory '%s' on all worker nodes" % params.local_path) code, output = exec_cmd( "%s rm -rf %s" % (params.parallel_run_pernode, params.local_path)) if code: logging.info(output) logging.error( "Error wiping the directory '%s' on worker nodes" % params.local_path) ## # Update the status ## job_db.set_job_status(Job.Status.FINISHED) exit_code = 0 except JobError as err: logging.error(err.msg) job_db.set_job_status(Job.Status.FAILED) exit_code = err.exit_code except: logging.error("Unexpected error", exc_info=1) job_db.set_job_status(Job.Status.FAILED) exit_code = 255 finally: ## # Create a log bundle ## os.chdir(params.root_path) log_name = "log_%d_%d" % (params.nchunk, params.job_id) log_tar = log_name + '.tar.gz' try: logging.info("Create tar file for logs") tar = tarfile.open(log_tar, "w:gz") tar.add('log', arcname=log_name) finally: tar.close() # Copy to repository oring = join(params.root_path, log_tar) dest = join(params.log_rea_output_path, log_tar) copy_file(oring, dest) ## # Close the connection with the database ## job_db.set_exit_code(exit_code) job_db.close(params.root_path) sys.exit(exit_code)
def clean_wrf_files(job_db, params, clean_all=False): """ Postprocess wrfout files and copy files to the output path """ with lock: for patt in params.files_to_save: all_files_patt = glob.glob(join(params.wrf_run_path, patt + '*')) if not clean_all: if len(all_files_patt) >= (2 * params.max_dom): all_files_patt.sort(key=os.path.getmtime) files = all_files_patt[:params.max_dom] else: continue else: files = all_files_patt for file in files: logging.info("Checking '%s' file" % file) file_name = basename(file) if file_name == "wrfrst_d01_" + datetime2datewrf( params.chunk_rdate): # Skip the initial restart file logging.info("Skipping initial restart file %s" % file_name) continue elif "wrfout" in file_name and params.postprocessor: code, output = exec_cmd("ncdump -v Times %s" % file_name) if "WRF4G_postprocessor" in output: logging.info("'%s' was already postprocessed" % file_name) continue try: mo = re.findall( "(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2})", output) end_date_file = mo[-1] except Exception as err: logging.error( "ERROR: Calculating wrfout_name_end_date %s" % err) end_date_file = None ## # Execute postprocessor ## logging.info("Running postprocessor.%s" % params.postprocessor) if not which("postprocessor.%s" % params.postprocessor): raise JobError( "Postprocessor '%s' does not exist" % params.postprocessor, Job.CodeError.POSTPROCESSOR_FAILED) post_log = join( params.log_path, 'postprocessor.%s.log' % params.postprocessor) code, output = exec_cmd( "postprocessor.%s %s &>> %s" % (params.postprocessor, file_name, post_log)) if code: logging.info(output) raise JobError( "Error processing '%s' file" % file_name, Job.CodeError.POSTPROCESSOR_FAILED) # The file will indicate that it has been postprocessed exec_cmd( 'ncatted -O -a WRF4G_postprocessor,global,o,c,"%s" %s' % (params.postprocessor, file)) elif "wrfrst" in file_name and "d01" in file_name: restart_date = WRFFile(file_name).date_datetime() logging.info("Setting restart date to '%s'" % restart_date) job_db.set_restart_date(restart_date) ## # Uploading "wrfout", "wrfrst", "wrfzout", "wrfz2out", "wrfrain", "wrfxtrm", "wrf24hc" files ## if patt != "wrfrst" and params.wrfout_name_end_date == 'yes' and end_date_file: dest_file = WRFFile(file_name, end_date_file).file_name_out_iso() else: dest_file = WRFFile(file_name).file_name_iso() if patt == "wrfrst": dest = join(params.rst_rea_output_path, dest_file) else: dest = join(params.out_rea_output_path, dest_file) logging.info("Uploading file '%s'" % file) os.chmod( file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH) try: copy_file(file, dest) except: logging.error("'%s' has not copied" % file) time.sleep(10) logging.info("Uploading file '%s' again" % file) try: copy_file(file, dest) except: raise JobError("'%s' has not copied" % file, Job.CodeError.COPY_OUTPUT_FILE) try: os.remove(file) except: pass