def get_metadata(fname): """Parse metadata from the file.""" res = None for section in CONFIG.sections(): try: parser = Parser(CONFIG.get(section, "pattern")) except NoOptionError: continue if not parser.validate(fname): continue res = parser.parse(fname) res.update(dict(CONFIG.items(section))) for key in ["watcher", "pattern", "timeliness", "regions"]: res.pop(key, None) res = trigger.fix_start_end_time(res) if ("sensor" in res) and ("," in res["sensor"]): res["sensor"] = res["sensor"].split(",") res["uri"] = fname res["filename"] = os.path.basename(fname) return res
def get_metadata(fname): """Parse metadata from the file. """ res = None for section in CONFIG.sections(): try: parser = Parser(CONFIG.get(section, "pattern")) except NoOptionError: continue if not parser.validate(fname): continue res = parser.parse(fname) res.update(dict(CONFIG.items(section))) for key in ["watcher", "pattern", "timeliness", "regions"]: res.pop(key, None) if "duration" in res and "end_time" not in res: res["end_time"] = (res["start_time"] + timedelta(seconds=int(res["duration"]))) if "start_date" in res: res["start_time"] = datetime.combine(res["start_date"].date(), res["start_time"].time()) if "end_date" not in res: res["end_date"] = res["start_date"] del res["start_date"] if "end_date" in res: res["end_time"] = datetime.combine(res["end_date"].date(), res["end_time"].time()) del res["end_date"] while res["start_time"] > res["end_time"]: res["end_time"] += timedelta(days=1) if "duration" in res: del res["duration"] if ("sensor" in res) and ("," in res["sensor"]): res["sensor"] = res["sensor"].split(",") res["uri"] = fname res["filename"] = os.path.basename(fname) return res
def get_metadata(fname): res = None for section in config.sections(): if section == "default": continue try: parser = Parser(config.get(section, "pattern")) except NoOptionError: continue if not parser.validate(fname): continue res = parser.parse(fname) res.update(dict(config.items(section))) for key in ["watcher", "pattern", "timeliness"]: res.pop(key, None) if "duration" in res and "end_time" not in res: res["end_time"] = (res["start_time"] + timedelta(seconds=int(res["duration"]))) if "start_date" in res: res["start_time"] = datetime.combine(res["start_date"].date(), res["start_time"].time()) if "end_date" not in res: res["end_date"] = res["start_date"] del res["start_date"] if "end_date" in res: res["end_time"] = datetime.combine(res["end_date"].date(), res["end_time"].time()) del res["end_date"] while res["start_time"] > res["end_time"]: res["end_time"] += timedelta(days=1) if "duration" in res: del res["duration"] res["uri"] = fname res["filename"] = os.path.basename(fname) return res
def update_nwp(params): LOG.info("METNO update nwp") tempfile.tempdir = params['options']['nwp_outdir'] ecmwf_path = params['options']['ecmwf_path'] if not os.path.exists(ecmwf_path): ecmwf_path = ecmwf_path.replace("storeB", "storeA") LOG.warning( "Need to replace storeB with storeA for ecmwf_path: {}".format( str(ecmwf_path))) filelist = glob( os.path.join(ecmwf_path, params['options']['ecmwf_prefix'] + "*")) if len(filelist) == 0: LOG.info("Found no input files! dir = " + str( os.path.join(ecmwf_path, params['options']['ecmwf_prefix'] + "*"))) return from trollsift import Parser, compose filelist.sort() for filename in filelist: if params['options']['ecmwf_file_name_sift'] is not None: try: parser = Parser(params['options']['ecmwf_file_name_sift']) except NoOptionError as noe: LOG.error("NoOptionError {}".format(noe)) continue if not parser.validate(os.path.basename(filename)): LOG.error( "Parser validate on filename: {} failed.".format(filename)) continue res = parser.parse("{}".format(os.path.basename(filename))) time_now = datetime.utcnow() if 'analysis_time' in res: if res['analysis_time'].year == 1900: # This is tricky. Filename is missing year in name # Need to guess the year from a compination of year now # and month now and month of the analysis time taken from the filename # If the month now is 1(January) and the analysis month is 12, # then the time has passed New Year, but the NWP analysis time is previous year. if time_now.month == 1 and res['analysis_time'].month == 12: analysis_year = time_now.year - 1 else: analysis_year = time_now.year res['analysis_time'] = res['analysis_time'].replace( year=analysis_year) else: LOG.error( "Can not parse analysis_time in file name. Check config and filename timestamp" ) if 'forecast_time' in res: if res['forecast_time'].year == 1900: # See above for explanation if res['analysis_time'].month == 12 and res[ 'forecast_time'].month == 1: forecast_year = res['analysis_time'].year + 1 else: forecast_year = res['analysis_time'].year res['forecast_time'] = res['forecast_time'].replace( year=forecast_year) else: LOG.error( "Can not parse forecast_time in file name. Check config and filename timestamp" ) forecast_time = res['forecast_time'] analysis_time = res['analysis_time'] step_delta = forecast_time - analysis_time step = "{:03d}H{:02d}M".format( int(step_delta.days * 24 + step_delta.seconds / 3600), 0) else: LOG.error("Not sift pattern given. Can not parse input NWP files") if analysis_time < params['starttime']: # LOG.debug("skip analysis time {} older than search time {}".format(analysis_time, params['starttime'])) continue if int(step[:3]) not in params['nlengths']: # LOG.debug("Skip step {}, not in {}".format(int(step[:3]), params['nlengths'])) continue output_parameters = {} output_parameters['analysis_time'] = analysis_time output_parameters['step_hour'] = int(step_delta.days * 24 + step_delta.seconds / 3600) output_parameters['step_min'] = 0 try: if not os.path.exists(params['options']['nwp_outdir']): os.makedirs(params['options']['nwp_outdir']) except OSError as e: LOG.error("Failed to create directory: %s", e) result_file = "" try: result_file = os.path.join( params['options']['nwp_outdir'], compose(params['options']['nwp_output'], output_parameters)) _result_file = os.path.join( params['options']['nwp_outdir'], compose("." + params['options']['nwp_output'], output_parameters)) _result_file_lock = os.path.join( params['options']['nwp_outdir'], compose("." + params['options']['nwp_output'] + ".lock", output_parameters)) except Exception as e: LOG.error( "Joining outdir with output for nwp failed with: {}".format(e)) LOG.info("Result file: {}".format(result_file)) if os.path.exists(result_file): LOG.info("File: " + str(result_file) + " already there...") continue import fcntl import errno import time rfl = open(_result_file_lock, 'w+') # do some locking while True: try: fcntl.flock(rfl, fcntl.LOCK_EX | fcntl.LOCK_NB) LOG.debug("1Got lock for NWP outfile: {}".format(result_file)) break except IOError as e: if e.errno != errno.EAGAIN: raise else: LOG.debug("Waiting for lock ... {}".format(result_file)) time.sleep(1) if os.path.exists(result_file): LOG.info("File: " + str(result_file) + " already there...") # Need to release the lock fcntl.flock(rfl, fcntl.LOCK_UN) rfl.close() continue fout = open(_result_file, 'wb') try: # Do the static fields # Note: field not in the filename variable, but a configured filename for static fields static_filename = params['options']['ecmwf_static_surface'] if not os.path.exists(static_filename): static_filename = static_filename.replace("storeB", "storeA") LOG.warning("Need to replace storeB with storeA") index_vals = [] index_keys = ['paramId', 'level'] LOG.debug("Start building index") LOG.debug("Handeling file: %s", filename) iid = ecc.codes_index_new_from_file(filename, index_keys) filename_n1s = filename.replace('N2D', 'N1S') LOG.debug("Add to index %s", filename_n1s) ecc.codes_index_add_file(iid, filename_n1s) LOG.debug("Add to index %s", static_filename) ecc.codes_index_add_file(iid, static_filename) LOG.debug("Done index") for key in index_keys: key_vals = ecc.codes_index_get(iid, key) key_vals = tuple(x for x in key_vals if x != 'undef') index_vals.append(key_vals) for prod in product(*index_vals): for i in range(len(index_keys)): ecc.codes_index_select(iid, index_keys[i], prod[i]) while 1: gid = ecc.codes_new_from_index(iid) if gid is None: break param = ecc.codes_get(gid, index_keys[0]) parameters = [ 172, 129, 235, 167, 168, 137, 130, 131, 132, 133, 134, 157 ] if param in parameters: LOG.debug("Doing param: %d", param) copy_needed_field(gid, fout) ecc.codes_release(gid) ecc.codes_index_release(iid) fout.close() os.rename(_result_file, result_file) except WrongLengthError as wle: LOG.error("Something wrong with the data: %s", wle) raise # In the end release the lock fcntl.flock(rfl, fcntl.LOCK_UN) rfl.close() os.remove(_result_file_lock) return
def update_nwp(starttime, nlengths): """Prepare NWP grib files for PPS. Consider only analysis times newer than *starttime*. And consider only the forecast lead times in hours given by the list *nlengths* of integers """ LOG.info("Path to prepare_nwp config file = %s", str(CONFIG_PATH)) LOG.info("Prepare_nwp config file = %s", str(CONFIG_FILE)) LOG.info("Path to nhsf files: %s", str(nhsf_path)) LOG.info("Path to nhsp files: %s", str(nhsp_path)) tempfile.tempdir = nwp_outdir filelist = glob(os.path.join(nhsf_path, nhsf_prefix + "*")) if len(filelist) == 0: LOG.info("No input files! dir = %s", str(nhsf_path)) return LOG.debug('NHSF NWP files found = %s', str(filelist)) nfiles_error = 0 for filename in filelist: if nhsf_file_name_sift is None: raise NwpPrepareError() try: parser = Parser(nhsf_file_name_sift) except NoOptionError as noe: LOG.error("NoOptionError {}".format(noe)) continue if not parser.validate(os.path.basename(filename)): LOG.error("Parser validate on filename: {} failed.".format(filename)) continue LOG.info("{}".format(os.path.basename(filename))) res = parser.parse("{}".format(os.path.basename(filename))) LOG.info("{}".format(res)) if 'analysis_time' in res: if res['analysis_time'].year == 1900: res['analysis_time'] = res['analysis_time'].replace(year=datetime.utcnow().year) analysis_time = res['analysis_time'] timestamp = analysis_time.strftime("%Y%m%d%H%M") else: raise NwpPrepareError("Can not parse analysis_time in file name. Check config and filename timestamp") if 'forecast_time' in res: if res['forecast_time'].year == 1900: res['forecast_time'] = res['forecast_time'].replace(year=datetime.utcnow().year) forecast_time = res['forecast_time'] forecast_step = forecast_time - analysis_time forecast_step = "{:03d}H{:02d}M".format(forecast_step.days*24 + forecast_step.seconds/3600, 0) timeinfo = "{:s}{:s}{:s}".format(analysis_time.strftime( "%m%d%H%M"), forecast_time.strftime("%m%d%H%M"), res['end']) else: LOG.info("Can not parse forecast_time in file name. Try forecast step...") # This needs to be done more solid using the sift pattern! FIXME! timeinfo = filename.rsplit("_", 1)[-1] # Forecast step in hours: if 'forecast_step' in res: forecast_step = res['forecast_step'] else: raise NwpPrepareError( 'Failed parsing forecast_step in file name. Check config and filename timestamp.') LOG.debug("Analysis time and start time: %s %s", str(analysis_time), str(starttime)) if analysis_time < starttime: continue if forecast_step not in nlengths: LOG.debug("Skip step. Forecast step and nlengths: %s %s", str(forecast_step), str(nlengths)) continue LOG.info("timestamp, step: %s %s", str(timestamp), str(forecast_step)) result_file = os.path.join( nwp_outdir, nwp_output_prefix + timestamp + "+" + '%.3dH00M' % forecast_step) if os.path.exists(result_file): LOG.info("File: " + str(result_file) + " already there...") continue tmp_filename = make_temp_filename(suffix="_" + timestamp + "+" + '%.3dH00M' % forecast_step, dir=nwp_outdir) LOG.info("result and tmp files: " + str(result_file) + " " + str(tmp_filename)) nhsp_file = os.path.join(nhsp_path, nhsp_prefix + timeinfo) if not os.path.exists(nhsp_file): LOG.warning("Corresponding nhsp-file not there: " + str(nhsp_file)) continue cmd = ("grib_copy -w gridType=regular_ll " + nhsp_file + " " + tmp_filename) retv = run_command(cmd) LOG.debug("Returncode = " + str(retv)) if retv != 0: LOG.error( "Failed doing the grib-copy! Will continue with the next file") nfiles_error = nfiles_error + 1 if nfiles_error > len(filelist) / 2: LOG.error( "More than half of the Grib files failed upon grib_copy!") raise IOError('Failed running grib_copy on many Grib files') if not os.path.exists(nwp_lsmz_filename): LOG.error("No static grib file with land-sea mask and " + "topography available. Can't prepare NWP data") raise IOError('Failed getting static land-sea mask and topography') tmp_result_filename = make_temp_filename() cmd = ('cat ' + tmp_filename + " " + os.path.join(nhsf_path, nhsf_prefix + timeinfo) + " " + nwp_lsmz_filename + " > " + tmp_result_filename) LOG.debug("Add topography and land-sea mask to data:") LOG.debug("Command = " + str(cmd)) _start = time.time() retv = os.system(cmd) _end = time.time() LOG.debug("os.system call took: %f seconds", _end - _start) LOG.debug("Returncode = " + str(retv)) if retv != 0: LOG.warning("Failed generating nwp file %s ...", result_file) if os.path.exists(tmp_result_filename): os.remove(tmp_result_filename) raise IOError("Failed adding topography and land-sea " + "mask data to grib file") if os.path.exists(tmp_filename): os.remove(tmp_filename) else: LOG.warning("tmp file %s gone! Cannot clean it...", tmp_filename) if check_nwp_content(tmp_result_filename): LOG.info('A check of the NWP file content has been attempted: %s', result_file) _start = time.time() os.rename(tmp_result_filename, result_file) _end = time.time() LOG.debug("Rename file %s to %s: This took %f seconds", tmp_result_filename, result_file, _end - _start) else: LOG.warning("Missing important fields. No nwp file %s written to disk", result_file) if os.path.exists(tmp_result_filename): os.remove(tmp_result_filename) return
def update_nwp(params): LOG.info("METNO update nwp") result_files = dict() tempfile.tempdir = params['options']['nwp_outdir'] ecmwf_path = params['options']['ecmwf_path'] if not os.path.exists(ecmwf_path): ecmwf_path = ecmwf_path.replace("storeB","storeA") LOG.warning("Need to replace storeB with storeA for ecmwf_path: {}".format(str(ecmwf_path))) filelist = glob(os.path.join(ecmwf_path, params['options']['ecmwf_prefix'] + "*")) if len(filelist) == 0: LOG.info("Found no input files! dir = " + str(os.path.join(ecmwf_path, params['options']['ecmwf_prefix'] + "*"))) return from trollsift import Parser, compose filelist.sort() for filename in filelist: if params['options']['ecmwf_file_name_sift'] != None: try: parser = Parser(params['options']['ecmwf_file_name_sift']) except NoOptionError as noe: LOG.error("NoOptionError {}".format(noe)) continue if not parser.validate(os.path.basename(filename)): LOG.error("Parser validate on filename: {} failed.".format(filename)) continue res = parser.parse("{}".format(os.path.basename(filename))) #This takes to long to complete. # if filename not in file_cache: # cmd="grib_get -w count=1 -p dataDate {}".format(filename) # run_shell_command(cmd, stdout_logfile='/tmp/dataDate') # dataDate = open("/tmp/dataDate", 'r') # dataDate_input = dataDate.read() # dataDate.close() # for dd in dataDate_input.splitlines(): # try: # _dataDate = datetime.strptime(dd, "%Y%m%d") # except Exception as e: # LOG.error("Failed with :{}".format(e)) # print "Data date is: {}".format(_dataDate) # _file_cache[filename] = _dataDate # file_cache.append(_file_cache) # else: # print "already got datetime" time_now = datetime.utcnow() if 'analysis_time' in res: if res['analysis_time'].year == 1900: #This is tricky. Filename is missing year in name #Need to guess the year from a compination of year now #and month now and month of the analysis time taken from the filename #If the month now is 1(January) and the analysis month is 12, #then the time has passed New Year, but the NWP analysis time is previous year. if time_now.month == 1 and res['analysis_time'].month == 12: analysis_year = time_now.year-1 else: analysis_year = time_now.year res['analysis_time'] = res['analysis_time'].replace( year = analysis_year) else: LOG.error("Can not parse analysis_time in file name. Check config and filename timestamp") if 'forecast_time' in res: if res['forecast_time'].year == 1900: #See above for explanation if res['analysis_time'].month == 12 and res['forecast_time'].month == 1: forecast_year = res['analysis_time'].year+1 else: forecast_year = res['analysis_time'].year res['forecast_time'] = res['forecast_time'].replace( year = forecast_year) else: LOG.error("Can not parse forecast_time in file name. Check config and filename timestamp") forecast_time = res['forecast_time'] analysis_time = res['analysis_time'] timestamp = analysis_time.strftime("%Y%m%d%H%M") step_delta = forecast_time - analysis_time step = "{:03d}H{:02d}M".format(int(step_delta.days*24 + step_delta.seconds/3600),0) timeinfo = "{:s}{:s}{:s}".format(analysis_time.strftime("%m%d%H%M"), forecast_time.strftime("%m%d%H%M"), res['end']) else: LOG.error("Not sift pattern given. Can not parse input NWP files") if analysis_time < params['starttime']: #LOG.debug("skip analysis time {} older than search time {}".format(analysis_time, params['starttime'])) continue if int(step[:3]) not in params['nlengths']: #LOG.debug("Skip step {}, not in {}".format(int(step[:3]), params['nlengths'])) continue output_parameters = {} output_parameters['analysis_time'] = analysis_time output_parameters['step_hour'] = int(step_delta.days*24 + step_delta.seconds/3600) output_parameters['step_min'] = 0 try: if not os.path.exists(params['options']['nwp_outdir']): os.makedirs(params['options']['nwp_outdir']) except OSError as e: LOG.error("Failed to create directory: %s", e) result_file = "" try: result_file = os.path.join(params['options']['nwp_outdir'], compose(params['options']['nwp_output'],output_parameters)) _result_file = os.path.join(params['options']['nwp_outdir'], compose("."+params['options']['nwp_output'],output_parameters)) _result_file_lock = os.path.join(params['options']['nwp_outdir'], compose("."+params['options']['nwp_output']+".lock",output_parameters)) except Exception as e: LOG.error("Joining outdir with output for nwp failed with: {}".format(e)) LOG.info("Result file: {}".format(result_file)) if os.path.exists(result_file): LOG.info("File: " + str(result_file) + " already there...") continue import fcntl import errno import time rfl = open(_result_file_lock,'w+') #do some locking while True: try: fcntl.flock(rfl, fcntl.LOCK_EX|fcntl.LOCK_NB) LOG.debug("1Got lock for NWP outfile: {}".format(result_file)) break; except IOError as e: if e.errno != errno.EAGAIN: raise else: LOG.debug("Waiting for lock ... {}".format(result_file)) time.sleep(1) if os.path.exists(result_file): LOG.info("File: " + str(result_file) + " already there...") #Need to release the lock fcntl.flock(rfl, fcntl.LOCK_UN) rfl.close() continue #Need to set up temporary file to copy grib fields to #If ram is available through /run/shm, use this, else use /tmp if os.path.exists("/run/shm"): __tmpfile = "/run/shm/__tmp" else: __tmpfile = "/tmp/__tmp" #mgid = codes_grib_multi_new() #codes_grib_multi_support_on() #Some parameters can be found from the first name, and some from paramID #Need to check the second of the first one is not found parameter_name_list = ["indicatorOfParameter","paramId"] fout = open(_result_file, 'wb') try: #Do the static fields #Note: field not in the filename variable, but a configured filename for static fields static_filename = params['options']['ecmwf_static_surface'] #print("Handeling static file: %s", static_filename) if not os.path.exists(static_filename): static_filename = static_filename.replace("storeB","storeA") LOG.warning("Need to replace storeB with storeA") index_vals = [] index_keys = ['paramId', 'level'] LOG.debug("Start building index") LOG.debug("Handeling file: %s", filename) iid = codes_index_new_from_file(filename, index_keys) filename_n1s = filename.replace('N2D','N1S') LOG.debug("Add to index %s", filename_n1s) codes_index_add_file(iid, filename_n1s) LOG.debug("Add to index %s", static_filename) codes_index_add_file(iid, static_filename) LOG.debug("Done index") for key in index_keys: #print("size: ", key, codes_index_get_size(iid, key)) key_vals = codes_index_get(iid, key) key_vals = tuple(x for x in key_vals if x != 'undef') #print(key_vals) #print(" ".join(key_vals)) index_vals.append(key_vals) for prod in product(*index_vals): #print('All products: ', end='') for i in range(len(index_keys)): #print('Range:', index_keys[i], prod[i]) #print("{} {}, ".format(index_keys[i], prod[i]), end='') codes_index_select(iid, index_keys[i], prod[i]) #print() while 1: gid = codes_new_from_index(iid) if gid is None: break #print(" ".join(["%s=%s" % (key, codes_get(gid, key)) # for key in index_keys])) param = codes_get(gid, index_keys[0]) #print("Doing param:",param) parameters = [172, 129, 235, 167, 168, 137, 130, 131, 132, 133, 134, 157] if param in parameters: LOG.debug("Doing param: %d",param) #copy_needed_field(gid, mgid) copy_needed_field(gid, fout) codes_release(gid) codes_index_release(iid) #fout = open(_result_file, 'wb') #codes_grib_multi_write(mgid, fout) #codes_grib_multi_release(mgid) fout.close() os.rename(_result_file, result_file) except WrongLengthError as wle: LOG.error("Something wrong with the data: %s", wle) raise #In the end release the lock fcntl.flock(rfl, fcntl.LOCK_UN) rfl.close() os.remove(_result_file_lock) return