Beispiel #1
0
def osg_data(runDATE,
             XY_point_pairs,
             variable='GUST:surface',
             fxx=range(19),
             percentile=95):
    '''
    Return a dictionary of HRRR climatology from OSG statistics for a series of points
    
    Input:
        runDATE        - Python Datetime Object for the model run date interested in
        XY_point_pairs - XY array coordinates for the points you want 
        variable       - HRRR variable name
        fxx            - Forecasts to include in the output
        percentile     - The percentile (from the file) you want to get
        outdir         - Path to write file
    '''
    ## First, get the latitude and longitude grid points that coorespond to the XY points

    # Load the HRRR latitude and longitude grid
    hLATLON = get_hrrr_latlon()

    # Get the latitude and longitude values for each XY point pair
    ulon = np.array([hLATLON['lon'][m, n] for m, n in XY_point_pairs])
    ulat = np.array([hLATLON['lat'][m, n] for m, n in XY_point_pairs])

    # We want to return these
    return_this = {'lat': ulat, 'lon': ulon, 'percentile': percentile}

    ## Next, get the forecasts for each of the points
    for f in fxx:
        print('\rworking on f%02d' % f, end="")
        # Get the HRRR File
        validDATE = runDATE + timedelta(hours=f)
        DATE = datetime(2016, validDATE.month, validDATE.day, validDATE.hour)
        var = variable.replace(':', '_').replace(' ', '_')
        DIR = '/uufs/chpc.utah.edu/common/home/horel-group8/blaylock/HRRR_OSG/hourly30/%s/' % var
        FILE = 'OSG_HRRR_%s_m%02d_d%02d_h%02d_f00.h5' % (var, DATE.month,
                                                         DATE.day, DATE.hour)
        print(DIR + FILE)
        OSG = xarray.open_dataset(DIR + FILE)

        # Get the points from the grid for the transmission lines
        OSG_pth = np.array(
            [OSG['p%02d' % percentile].data[m, n] for m, n in XY_point_pairs])

        # Add that to the array we want to return
        return_this['f%02d' % f] = OSG_pth

    print('\rDONE!')
    return return_this
Beispiel #2
0
def load_lats_lons(model):
    """
    Preload the latitude and longitude grid
    """
    if model in ['hrrr', 'hrrrX']:
        lats, lons = get_hrrr_latlon(DICT=False)
    elif model == 'hrrrak':
        AK = get_hrrr_variable(datetime(2018, 2, 24, 15),
                               'TMP:2 m',
                               fxx=0,
                               model='hrrrak',
                               verbose=False)
        lats = AK['lat']
        lons = AK['lon']
    return [lats, lons]
Beispiel #3
0
def RMSD(validDATE, variable, FORECASTS=range(19), verbose=True):
    """
    Root-mean-square-difference for a single forecast time.
    datetime(2018, 8, 12, 21) # period of convection over Coal Hollow Fire
    variable = 'CAPE:surface'
    """
    # Load all forecasts grids for this time
    if variable.split(':')[0] == 'UVGRD':
        values = np.array([get_hrrr_variable(validDATE-timedelta(hours=f), variable, fxx=f, value_only=True, verbose=False)['SPEED'] for f in FORECASTS])
        forecasts = np.array([i for i in values if np.shape(i) != ()])
    else:
        # We have to filter out any 'nan' values if a file does not exist 
        values = [get_hrrr_variable(validDATE-timedelta(hours=f), variable, fxx=f, value_only=True, verbose=False)['value'] for f in FORECASTS]
        forecasts = np.array([i for i in values if np.shape(i) != ()])
    
    if verbose:
        print(np.shape(forecasts))

    # Differences of each consecutive forecast (F00-F01, F01-F02, F03-F04, etc.)
    #differences = np.array([forecasts[i-1]-forecasts[i] for i in FORECASTS[1:]])

    # RMSD between consecutive forecasts and all reference hours (differences matrix)
    #print(['F%02d-F%02d' % (i, j) for i in FORECASTS for j in FORECASTS if i-j > 0])

    # Differences between each forecasts (don't double count)
    differences_all = np.array([forecasts[i]-forecasts[j] for i in range(len(forecasts)) for j in range(len(forecasts)) if i-j > 0])

    RMSD_all = np.sqrt(np.mean(differences_all**2, axis=0))

    ## Normalized RMSDs, normalized by range (max-min)
    # NOTE: Ranges can't be zero or will get a divide by zero error
    # Get some percentiles for max, min
    q100, q00 = np.percentile(forecasts, [100, 0], axis=0)
    maxmin_range = q100-q00
    maxmin_range[maxmin_range==0] = np.nan
    nRMSD_range = RMSD_all/maxmin_range
        
    # Grid Lat/Lon and return info
    latlon = get_hrrr_latlon()
    latlon['RMSD'] = RMSD_all
    latlon['variable'] = variable
    latlon['DATE'] = validDATE
    latlon['normalized RMSD by range'] = nRMSD_range

    return latlon
Beispiel #4
0
def forecast_data(runDATE,
                  XY_point_pairs,
                  variable='GUST:surface',
                  fxx=range(19)):
    '''
    Return a dictionary of forecasted values for a series of points
    
    Input:
        runDATE        - Python Datetime Object for the model run date interested in.
        XY_point_pairs - XY array coordinate pairs for the points you want. 
        variable       - HRRR variable name.
        fxx            - Forecasts to include in the output.
    '''
    ## First, get the latitude and longitude grid points that coorespond to the XY points

    # Load the HRRR latitude and longitude grid
    hLATLON = get_hrrr_latlon()

    # Get the latitude and longitude values for each XY point pair
    ulon = np.array([hLATLON['lon'][m, n] for m, n in XY_point_pairs])
    ulat = np.array([hLATLON['lat'][m, n] for m, n in XY_point_pairs])

    # We want to return these
    header = 'LATITUDE, LONGITUDE'
    return_this = {'lat': ulat, 'lon': ulon}

    ## Next, get the forecasts for each of the points
    for f in fxx:
        print('\rworking on f%02d' % f, end="")
        # Get the HRRR File
        H = get_hrrr_variable(runDATE,
                              variable='GUST:surface',
                              fxx=f,
                              verbose=False)

        # Get the points from the grid for the transmission lines
        vv = np.array([H['value'][m, n] for m, n in XY_point_pairs])

        # Add that to the array we want to return
        return_this['f%02d' % f] = vv

    print('\rDONE!')
    return return_this
Beispiel #5
0
def RMSD_range(sDATE, eDATE, variable, HOURS=[0], FORECASTS=range(19)):
    """
    Compute the RMSD for a range of dates at a given hour.

    Inputs:
        sDATE     - Datetime start(valid Date)
        eDATE     - Datetime end (valid Date)
        variable  - HRRR variable string (i.e. 'TMP:2 m')
        HOURS     - the hour(s) you want to include in the RMSD calculation
        FORECASTS - the forecasts you want to include in the RMSD calculation
    """
    print('============ %s ============' % variable)
    print('     sDATE : %s' % sDATE)
    print('     eDATE : %s' % eDATE)   
    print('     HOURS : %s' % HOURS)
    print('  FORCASTS : %s' % FORECASTS)
    print()
    days = (eDATE-sDATE).days
    DAYS = [sDATE + timedelta(days=d) for d in range(days)]
    DATES = [datetime(d.year, d.month, d.day, h) for d in DAYS for h in HOURS]

    # Load all forecast grids for each valid time using multiprocessing.
    #cpus = np.minimum(multiprocessing.cpu_count(), len(DATES))-2
    cpus = 7
    P = multiprocessing.Pool(cpus)
    
    inputs = [[D, variable, FORECASTS] for D in DATES]
    results = P.map(RMSD_range_MP, inputs)
    P.close()
    count = np.sum([i[0] for i in results])
    sum_squares = np.sum([i[1] for i in results], axis=0)

    # RMSD Calculation
    RMSD = np.sqrt(sum_squares/count)
    
    # Grid Lat/Lon and return info
    latlon = get_hrrr_latlon()
    latlon['RMSD'] = RMSD
    latlon['variable'] = variable
    latlon['DATE RANGE'] = [sDATE, eDATE]

    return latlon
Beispiel #6
0
mpl.rcParams['xtick.labelsize'] = 10
mpl.rcParams['ytick.labelsize'] = 10
mpl.rcParams['axes.labelsize'] = 12
mpl.rcParams['axes.titlesize'] = 15
mpl.rcParams['lines.linewidth'] = 1.8
mpl.rcParams['grid.linewidth'] = .25
mpl.rcParams['figure.subplot.wspace'] = 0.05
mpl.rcParams['figure.subplot.hspace'] = 0.05
mpl.rcParams['legend.fontsize'] = 10
mpl.rcParams['legend.framealpha'] = .75
mpl.rcParams['legend.loc'] = 'best'
mpl.rcParams['savefig.bbox'] = 'tight'
mpl.rcParams['savefig.dpi'] = 100

# Build map objects and get HRRR latitude and longitude grids.
latlon = get_hrrr_latlon()
lat = latlon['lat']
lon = latlon['lon']
m = draw_HRRR_map()                             # CONUS
mW = draw_centermap(40, -115, (10,10))          # West
mU = draw_centermap(39.5, -111.6, (3.2,3.2))    # Utah

# Variable constants
VARS = {'TMP:2 m':{'cmap':'magma',
                    'vmax':2.5,
                    'vmin':0,
                    'label':'2 m Temperature',
                    'units':'C'},
        'DPT:2 m':{'cmap':'magma',
                    'vmax':5,
                    'vmin':0,
Beispiel #7
0
def filter_by_path(glm):
    """
    Inputs:
        glm    - the object returned by accumulate_GLM
    """
    from matplotlib.path import Path
    from BB_HRRR.HRRR_Pando import get_hrrr_latlon

    # Get HRRR latitude and longitude grids
    Hlat, Hlon = get_hrrr_latlon(DICT=False)

    # =============================================================================
    print('Make domain paths...')
    ## Create Path boundaries of HRRR domain and subdomains of interest:
    # HRRR: All points counter-clockwise around the model domain.
    # West, Central, East: A 16 degree wide and 26 degree tall boundary region.
    PATH_points = {
        'HRRR': {
            'lon':
            np.concatenate(
                [Hlon[0], Hlon[:, -1], Hlon[-1][::-1], Hlon[:, 0][::-1]]),
            'lat':
            np.concatenate(
                [Hlat[0], Hlat[:, -1], Hlat[-1][::-1], Hlat[:, 0][::-1]])
        },
        'West': {
            'lon': [-120, -104, -104, -120, -120],
            'lat': [24.4, 24.4, 50.2, 50.2, 24.2]
        },
        'Central': {
            'lon': [-104, -88, -88, -104, -104],
            'lat': [24.4, 24.4, 50.2, 50.2, 24.2]
        },
        'East': {
            'lon': [-88, -72, -72, -88, -88],
            'lat': [24.4, 24.4, 50.2, 50.2, 24.2]
        },
        'Utah': {
            'lon': [
                -114.041664, -111.047526, -111.045645, -109.051460,
                -109.048632, -114.051534, -114.041664
            ],
            'lat': [
                41.993580, 42.002846, 40.998538, 40.998403, 36.998310,
                37.000574, 41.993580
            ]
        }
    }
    ## Combine lat/lon as vertice pair as a tuple. i.e. (lon, lat).
    PATH_verts = {}
    for i in PATH_points.keys():
        PATH_verts[i] = np.array([(PATH_points[i]['lon'][j],
                                   PATH_points[i]['lat'][j])
                                  for j in range(len(PATH_points[i]['lon']))])

    ## Generate Path objects from the vertices.
    PATHS = {}
    for i in PATH_verts.keys():
        PATHS[i] = Path(PATH_verts[i])

    ## Filter GLM observation within the HRRR domain. -------------------------
    #--------------------------------------------------------------------------
    # The GLM observes flashes for the disk in its field of view. We only want
    # the flashes within the HRRR domain. This is tricky. We can't just use
    # lat/lon bounds, because the projection of the HRRR model *bends* the
    # lat/lon and we would overshoot corners.
    # Instead, we will use the Paths created at the begining of the script and
    # determine which GLM points are inside each Path.Patch.

    # Total number of GLM observations
    print("Total GLM observations:", len(glm['latitude']))

    # Generate a lat/lon tuple for each point
    print('generate lat/lon pair...')
    GLM_latlon_pair = list(zip(glm['longitude'], glm['latitude']))
    print('...done')

    # Return a None if the legnth of GLM_latlon_pairs is zero, because there
    # are no flashes.
    if len(GLM_latlon_pair) == 0:
        print('!!! Warning !!! DATE %s had no lightning data' % DATE)
        # Return: hit_rate, number of flashes, number of files, numb of expected files
        return None

    ## Determine which GLM observation points, from a lat/lon tuple, are in
    # each boundary Path.
    # !! Refer to the Paths generated at the begining of the script!!

    # Using each Path in PATHS, determine which GLM points fall inside the Path
    print('Find which points are inside path')
    print('')
    inside_path = {}
    for i in PATHS.keys():
        inside_path[i] = PATHS[i].contains_points(GLM_latlon_pair)
    print('...done!')

    ## Filter the GLM data keys by the bounding Path (points inside Path):
    filtered_glm = {'DATETIME': glm['DATETIME']}
    for i in inside_path.keys():
        if np.size(glm['area']) == 1:
            filtered_glm[i] = {
                'latitude': glm['latitude'][inside_path[i]],
                'longitude': glm['longitude'][inside_path[i]],
                'energy': glm['energy'][inside_path[i]],
                'area': None
            }
        else:
            filtered_glm[i] = {
                'latitude': glm['latitude'][inside_path[i]],
                'longitude': glm['longitude'][inside_path[i]],
                'energy': glm['energy'][inside_path[i]],
                'area': glm['area'][inside_path[i]]
            }
    return filtered_glm
Beispiel #8
0
            except:
                print('%s no MesoWest dew point' % a['NAME'])
            try:
                loc['fig'][3].plot(a['DATETIME'],
                                   mps_to_MPH(a['wind_speed']),
                                   c='k',
                                   ls='--',
                                   zorder=50)
            except:
                print('%s no MesoWest wind speed' % a['NAME'])

## Now, add the element that changes, save the figure for each forecast.
# 2.2) Get Radar Reflectivity and winds for entire CONUS for every forecast
HH_refc = get_hrrr_all_run(DATE, 'REFC:entire')
HH_u, HH_v, HH_spd = get_hrrr_all_run(DATE, 'UVGRD:10 m')
Hlat, Hlon = get_hrrr_latlon(DICT=False)

# Convert Units (meters per second -> miles per hour)
HH_u = mps_to_MPH(np.array(HH_u))
HH_v = mps_to_MPH(np.array(HH_v))
HH_spd = mps_to_MPH(np.array(HH_spd))

for fxx in range(0, 19):
    for n, (name, loc) in enumerate(location.items()):
        tz = loc['timezone']
        print("  --> Working on: F%02d %s" % (fxx, name))
        ## Get grids for this particular forecast time...
        H_refc = HH_refc[fxx]
        H_u = HH_u[fxx]
        H_v = HH_v[fxx]
        #
Beispiel #9
0
def NEP(validDATE,
        threshold=35,
        variable='REFC:entire',
        radius=9,
        fxx=range(2, 5),
        hours_span=0):
    '''
    Compute the Neighborhood Ensemble Probability

    Input:
        DATE      - Datetime object representing the valid date.
        threshold - The threshold value for which you wish to compute probability.
        variable  - Variable string from the HRRR .idx file 
        radius    - The number of grid points the radial spatial filter uses
        fxx       - A list of forecast hours (between 0 and 18) to use in the probability.
        hour_span - Number hours +/- the valid hour to consider as members.
                    Default 0 will only use the validDATE. If you set to 1, then
                    will use additional model runs valid for +/- 1 hour the
                    validDATE
    '''
    # Generate a list of datetimes based on the validDATE and the hours_span
    sDATE = validDATE - timedelta(hours=hours_span)
    eDATE = validDATE + timedelta(hours=hours_span)
    DATES = [
        sDATE + timedelta(hours=h)
        for h in range(int((eDATE - sDATE).seconds / 60 / 60) + 1)
    ]

    print()
    print('### Neighborhood Ensemble Probability###')
    print('     Valid Dates:\t%s' % DATES)
    print('        Variable:\t%s' % variable)
    print(' Threshold value:\t%s' % threshold)
    print('   Radial filter:\t %s grid points (%s km)' % (radius,
                                                          (3 * radius)))
    print('  Forecast hours:\t%s' % (['f%02d' % f for f in fxx]))

    ## First, compute the ensemble probability (EP) for each grid point.
    # Retrieve all the HRRR grids as each member.
    inputs_list = [[d, f, threshold, variable, threshold] for d in DATES
                   for f in fxx]
    # Multiprocessing :)
    cpus = np.minimum(multiprocessing.cpu_count(), len(inputs_list))
    p = multiprocessing.Pool(cpus)
    members = np.array(p.map(member_multipro, inputs_list))
    p.close()

    # Average of all grid points, ensemble probability (EP)
    EP = np.mean(members, axis=0)

    ## Second, average EP neighborhood at each point.
    NEP = ndimage.generic_filter(EP,
                                 neighborhood_mean,
                                 footprint=radial_footprint(radius))

    print("\n###########################################################")
    print("  Neighborhood Ensemble Probability:")
    print(
        "    Percentage of grid points within the radius %s km (%s grid points)"
        % (radius * 3, radius))
    print("    where %s >= %s" % (variable, threshold))
    print("###########################################################\n")

    ## 3) Return some values
    # Load the latitude and longitude.
    return_this = get_hrrr_latlon()

    # Return the array with zero probability masked
    masked = NEP
    masked = np.ma.array(masked)
    masked[masked == 0] = np.ma.masked

    # also return the masked probabilities
    return_this['prob'] = masked
    return_this['members'] = len(members)

    return return_this
Beispiel #10
0
def TLE(validDATE,
        threshold=35,
        variable='REFC:entire',
        radius=9,
        fxx=range(2, 5),
        hours_span=0):
    '''
    Compute the Time-Lagged Ensemble for a variable

    Input:
        DATE      - Datetime object representing the valid date.
        threshold - The threshold value for which you wish to compute probability.
        variable  - Variable string from the HRRR .idx file 
        radius    - The number of grid points the radial spatial filter uses
        fxx       - A list of forecast hours (between 0 and 18) to use in the probability.
        day_span  - Number hours +/- the valid hour to consider as members.
                    Default 0 will only use the validDATE. If you set to 1, then
                    will use additional model runs valid for +/- 1 hour the
                    validDATE
    '''
    # Generate a list of datetimes based on the validDATE and the hours_span
    sDATE = validDATE - timedelta(hours=hours_span)
    eDATE = validDATE + timedelta(hours=hours_span)
    DATES = [
        sDATE + timedelta(hours=h)
        for h in range(int((eDATE - sDATE).seconds / 60 / 60) + 1)
    ]

    print()
    print('### Time-Lagged Ensemble###')
    print('     Valid Dates:\t%s' % DATES)
    print('        Variable:\t%s' % variable)
    print(' Threshold value:\t%s' % threshold)
    print('   Radial filter:\t %s grid points (%s km)' % (radius,
                                                          (3 * radius)))
    print('  Forecast hours:\t%s' % (['f%02d' % f for f in fxx]))

    # Run spatial filter for each member:
    # List of inputs used for multiprocessing for each member
    inputs_list = [[d, f, threshold, variable, radius] for d in DATES
                   for f in fxx]

    # Multiprocessing :)
    cpus = np.minimum(multiprocessing.cpu_count(), len(inputs_list))
    p = multiprocessing.Pool(cpus)
    members = p.map(member_multipro, inputs_list)
    # The maximum number of point in the radial footprint
    max_points = np.sum(radial_footprint(radius))
    # The TLE probability is the mean of the probability of "hits" within the radius
    member_mean = np.mean(members / max_points, axis=0)
    # Return the array with zero probability masked
    masked = member_mean
    masked = np.ma.array(masked)
    masked[masked == 0] = np.ma.masked

    # Load the latitude and longitude.
    return_this = get_hrrr_latlon()

    # also return the masked probabilities
    return_this['prob'] = masked
    return_this['members'] = len(members)

    return return_this