def get_hrrr_variable(DATE, variable, fxx=0, model='hrrr', field='sfc', removeFile=True, value_only=False, verbose=True, outDIR='./'): """ Uses cURL to grab just one variable from a HRRR grib2 file on the MesoWest HRRR archive. Input: DATE - the datetime(year, month, day, hour) for the HRRR file you want This must be in UTC, obviouslly. variable - a string describing the variable you are looking for. Refer to the .idx files here: https://api.mesowest.utah.edu/archive/HRRR/ You want to put the variable short name and the level information For example, for 2m temperature: 'TMP:2 m above ground' fxx - the forecast hour you desire. Default is the anlaysis hour. model - the model you want. Options include ['hrrr', 'hrrrX', 'hrrrAK'] field - the file type your variable is in. Options include ['sfc', 'prs'] removeFile - True will remove the grib2 file after downloaded. False will not. value_only - Only return the values. Fastest return speed if set to True, when all you need is the value. Return Time .75-1 Second if False, .2 seconds if True. verbose - prints some stuff out """ # Model direcotry names are named differently than the model name. if model == 'hrrr': model_dir = 'oper' elif model == 'hrrrX': model_dir = 'exp' elif model == 'hrrrAK': model_dir = 'alaska' # Temp file name has to be very unique, else when we use multiprocessing we # might accidentally delete files before we are done with them. outfile = '%stemp_%04d%02d%02d%02d_f%02d_%s.grib2' % ( outDIR, DATE.year, DATE.month, DATE.day, DATE.hour, fxx, variable[:3]) if verbose is True: print outfile # Dear User, # Only HRRR files for the previous day have been transfered to Pando. # That means if you are requesting data for today, you need to get it from # the NOMADS website. Good news, it's an easy fix. All we need to do is # redirect you to the NOMADS URLs. I'll check that the date you are # requesting is not for today's date. If it is, then I'll send you to # NOMADS. Deal? :) # -Sincerely, Brian UTC = datetime.utcnow() # the current date in UTC if DATE < datetime(UTC.year, UTC.month, UTC.day): # Get HRRR from Pando if verbose is True: print "Oh, good, you requested a date that should be on Pando." # URL for the grib2.idx file fileidx = 'https://api.mesowest.utah.edu/archive/HRRR/%s/%s/%04d%02d%02d/%s.t%02dz.wrf%sf%02d.grib2.idx' \ % (model_dir, field, DATE.year, DATE.month, DATE.day, model, DATE.hour, field, fxx) # URL for the grib2 file (located on PANDO S3 archive) pandofile = 'https://pando-rgw01.chpc.utah.edu/HRRR/%s/%s/%04d%02d%02d/%s.t%02dz.wrf%sf%02d.grib2' \ % (model_dir, field, DATE.year, DATE.month, DATE.day, model, DATE.hour, field, fxx) else: # Get operational HRRR from NOMADS if model == 'hrrr': if verbose is True: print "\n-----------------------------------------------------------------------" print "!! Hey! You are requesting a date that is not on the Pando archive !!" print "!! That's ok, I'll redirect you to the NOMADS server. :) !!" print "-----------------------------------------------------------------------\n" # URL for the grib2 idx file fileidx = 'http://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.%04d%02d%02d/%s.t%02dz.wrf%sf%02d.grib2.idx' \ % (DATE.year, DATE.month, DATE.day, model, DATE.hour, field, fxx) # URL for the grib2 file (located on NOMADS server) pandofile = 'http://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.%04d%02d%02d/%s.t%02dz.wrf%sf%02d.grib2' \ % (DATE.year, DATE.month, DATE.day, model, DATE.hour, field, fxx) # or, get experiemtnal HRRR from ESRL elif model == 'hrrrX': print "\n-----------------------------------------------------------------------" print "!! Need to download today's Experimental HRRRfrom ESRL via FTP !!" print "!! Have to get the full file, and then will have to sift through each field" print "-----------------------------------------------------------------------\n" import sys sys.path.append( '/uufs/chpc.utah.edu/common/home/u0553130/pyBKB_v2/') from BB_MesoWest.get_token import get_ESRL_credentials user, password = get_ESRL_credentials() ESRL_file = datetime.strftime(DATE, '%y%j%H00') + '%02d00' % (fxx) ftp = FTP('gsdftp.fsl.noaa.gov') ftp.login(user, password) ftp.cwd('hrrr/conus/wrftwo') # What is the initalized hour and forecast? hour = ESRL_file[5:7] forecast = ESRL_file[9:11] # Save the file similar to the standard hrrr file naming convention # except insert an X to represent that this is the experimental version OUTDIR = './' NEWFILE = 'hrrrX.t%sz.wrfsfcf%s.grib2' % (hour, forecast) if os.path.isfile(OUTDIR + NEWFILE): print "looks like that file already exists", OUTDIR + NEWFILE else: print "Downloading:", OUTDIR + NEWFILE ftp.retrbinary('RETR ' + ESRL_file, open(OUTDIR + NEWFILE, 'wb').write) ftp.quit() print "Finished Downloading" os.system('wgrib2 ' + OUTDIR + NEWFILE + ' -t -var -lev -ftime > ' + OUTDIR + NEWFILE + '.idx') idxpage = open(OUTDIR + NEWFILE + '.idx') lines = idxpage.readlines() gcnt = 0 for g in lines: expr = re.compile(variable) if expr.search(g): if verbose is True: print 'matched a variable', g parts = g.split(':') number = int(parts[0]) if verbose is True: print 'grib field number:', number gcnt += 1 # 3) Get data from the file, using pygrib grbs = pygrib.open(OUTDIR + NEWFILE) if value_only is True: value = grbs[number].values # (Remove the temporary file) # ?? Is it possible to push the data straight from curl to ?? # ?? pygrib, without writing/removing a temp file? and ?? # ?? would that speed up this process? ?? if removeFile is True: os.system('rm -f %s' % (OUTDIR + NEWFILE)) os.system('rm -f %s' % (OUTDIR + NEWFILE + '.idx')) return {'value': value} else: value, lat, lon = grbs[number].data() validDATE = grbs[number].validDate anlysDATE = grbs[number].analDate msg = str(grbs[number]) # 4) Remove the temporary file if removeFile == True: os.system('rm -f %s' % (OUTDIR + NEWFILE)) os.system('rm -f %s' % (OUTDIR + NEWFILE + '.idx')) # 5) Return some import stuff from the file return { 'model': model, 'value': value, 'lat': lat, 'lon': lon, 'valid': validDATE, 'anlys': anlysDATE, 'msg': msg } try: # 0) Read in the grib2.idx file try: # ?? Ignore ssl certificate (else urllib2.openurl wont work). # Depends on your version of python. # See here: # http://stackoverflow.com/questions/19268548/python-ignore-certicate-validation-urllib2 ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE idxpage = urllib2.urlopen(fileidx, context=ctx) except: idxpage = urllib2.urlopen(fileidx) lines = idxpage.readlines() # 1) Find the byte range for the variable. Need to first find where the # variable is located. Keep a count (gcnt) so we can get the end # byte range from the next line. gcnt = 0 for g in lines: expr = re.compile(variable) if expr.search(g): if verbose is True: print 'matched a variable', g parts = g.split(':') rangestart = parts[1] parts = lines[gcnt + 1].split(':') rangeend = int(parts[1]) - 1 if verbose is True: print 'range:', rangestart, rangeend byte_range = str(rangestart) + '-' + str(rangeend) # 2) When the byte range is discovered, use cURL to download. os.system('curl -s -o %s --range %s %s' % (outfile, byte_range, pandofile)) gcnt += 1 # 3) Get data from the file, using pygrib grbs = pygrib.open(outfile) if value_only is True: value = grbs[1].values # (Remove the temporary file) # ?? Is it possible to push the data straight from curl to ?? # ?? pygrib, without writing/removing a temp file? and ?? # ?? would that speed up this process? ?? if removeFile is True: os.system('rm -f %s' % (outfile)) return {'value': value} else: value, lat, lon = grbs[1].data() validDATE = grbs[1].validDate anlysDATE = grbs[1].analDate msg = str(grbs[1]) # 4) Remove the temporary file if removeFile == True: os.system('rm -f %s' % (outfile)) # 5) Return some import stuff from the file return { 'value': value, 'lat': lat, 'lon': lon, 'valid': validDATE, 'anlys': anlysDATE, 'msg': msg } except: print " ! Could not get the file:", pandofile print " ! Is the variable right?", variable print " ! Does the file exist?", fileidx return { 'value': np.nan, 'lat': np.nan, 'lon': np.nan, 'valid': np.nan, 'anlys': np.nan, 'msg': np.nan }
# Directory to save the downloads. Create it if it doesn't exist OUTDIR = '/uufs/chpc.utah.edu/common/home/horel-group/archive/%04d%02d%02d/BB_test/models/hrrrAK/' \ % (yesterday.year, yesterday.month, yesterday.day) if not os.path.exists(OUTDIR): os.makedirs(OUTDIR) # Change directory permissions os.chmod(OUTDIR, stat.S_IRWXU | \ stat.S_IRGRP | stat.S_IXGRP | \ stat.S_IROTH | stat.S_IXOTH) # User can read, write, execute # Group can read and execute # Others can read and execute # Credentials for logging into ESRL FTP database user, password = get_ESRL_credentials() # rclone config file config_file = '/scratch/local/mesohorse/.rclone.conf' # meso1 mesohorse user # ---------------------------------------------------------------------------- def create_idx(for_this_file, put_here): """ Create a .idx file and move to horel-group/archive/HRRR """ file_name = for_this_file.split('/')[-1] idx_dir = '/uufs/chpc.utah.edu/common/home/horel-group/archive/' + put_here if not os.path.exists(idx_dir): os.makedirs(idx_dir) idx_name = idx_dir + file_name + '.idx'
def get_grib2(model, model_params, DIR, idx=True): """ Download EXPERIMENTAL HRRR from NOAA ESRL via FTP ftp://gsdftp.fsl.noaa.gov/ Files on the FTP site are only available for the last day. Input: model - [hrrrX, hrrrakX] model_params - A dictionary of the model parameters: {'hours': range(0,24), 'fxx':{'sfc':range(0,18), 'prs':range(0,1)}} DIR - Where should I save the files? idx - Should I download/create an .idx file? """ # Credentials for logging into ESRL FTP database user, password = get_ESRL_credentials() # Models are named slightly different on ESRL than on NOMADS. I want to # preserve my nameing convention for the files downloaded from ESRL. if model == 'hrrrX': ESRL = 'hrrr/conus/' png = True elif model == 'hrrrak': ESRL = 'hrrr_ak/alaska/' png = False # For every requested field type, get a list of the available files requested_fields = [ T for T in model_params['fxx'].keys() if len(model_params['fxx'][T]) > 0 ] for field in requested_fields: ftp = FTP('gsdftp.fsl.noaa.gov') ftp.login(user, password) if field == 'sfc': ftp.cwd('%s/wrftwo' % ESRL) elif field == 'prs': ftp.cwd('%s/wrfprs' % ESRL) elif field == 'nat': ftp.cwd('%s/wrfnat' % ESRL) # Get a list of the files... ftp_filenames = ftp.nlst() ftp.quit() ## Filter list of files for files named with only digits ftp_filenames = filter(lambda x: x.isdigit(), ftp_filenames) ## Filter list of files by if the fxx was requested ftp_filenames = filter( lambda x: int(x[-4:-2]) in model_params['fxx'][field], ftp_filenames) ## Extract from each ftp_filenames the run datetime and forecast hour ftp_filenames_EXTRACT = map( lambda x: (datetime.strptime(x[:-4], '%y%j%H%M'), x[-4:-2]), ftp_filenames) ## List the path and new file name for each ftp_filenames_EXTRACT ftp_filenames_NEW = map( lambda x: '%s/%s/%s/%s.t%02dz.wrf%sf%s.grib2' % (model, field, x[ 0].strftime('%Y%m%d'), model, x[0].hour, field, x[1]), ftp_filenames_EXTRACT) ## Filter the ftp file list if the file has not been downloaded not_on_Pando = map( lambda x: not os.path.isfile(DIR + x) or os.path.getsize(DIR + x) < 5 * 10e6, ftp_filenames_NEW) ftp_filenames = [ ftp_filenames[d] for d in range(len(not_on_Pando)) if not_on_Pando[d] ] # Download each file, only if the file name is made of a digits for i in ftp_filenames: # I only have success downloading if I login to the FTP site each # time I download a file. ftp = FTP('gsdftp.fsl.noaa.gov') ftp.login(user, password) if field == 'sfc': ftp.cwd('%s/wrftwo' % ESRL) elif field == 'prs': ftp.cwd('%s/wrfprs' % ESRL) elif field == 'nat': ftp.cwd('%s/wrfnat' % ESRL) print "logged in for", i # What is the file's initialized hour and forecast? DATE = datetime.strptime(i[:-4], '%y%j%H%M') fxx = int(i[-4:-2]) # Where should I put this file? PATH = '%s/%s/%s/' % (model, field, DATE.strftime('%Y%m%d')) FILE = '%s.t%02dz.wrf%sf%02d.grib2' % (model, DATE.hour, field, fxx) print DIR + PATH + FILE # If the destination DIR path does not exist, then create it if not os.path.exists(DIR + PATH): os.makedirs(DIR + PATH) print "Downloading:", DIR + PATH + FILE ftp.retrbinary('RETR ' + i, open(DIR + PATH + FILE, 'wb').write) print "Saved:", DIR + PATH + FILE ftp.quit() # Create the .idx file if idx: create_idx(DIR + PATH + FILE)