Beispiel #1
1
                                   longitude[j, i + 1], latitude[j, i + 1])
    dx[:, i + 1] = dx[:, i]

    xdiff_sign = np.sign(longitude[0, 1] - longitude[0, 0])
    ydiff_sign = np.sign(latitude[1, 0] - latitude[0, 0])
    return xdiff_sign * dx * units.meter, ydiff_sign * dy * units.meter


###############################################
# Create NCSS object to access the NetcdfSubset
# ---------------------------------------------
# Data from NOMADS GFS 0.5 deg Analysis Archive
# https://www.ncdc.noaa.gov/data-access/model-data/model-datasets/global-forcast-system-gfs
dt = datetime(2017, 4, 5, 12)
ncss = NCSS(
    'https://nomads.ncdc.noaa.gov/thredds/ncss/grid/gfs-004-anl/'
    '{0:%Y%m}/{0:%Y%m%d}/gfsanl_4_{0:%Y%m%d}_{0:%H}00_000.grb2'.format(dt))

# Create lat/lon box for location you want to get data for
query = ncss.query().time(dt)
query.lonlat_box(north=65, south=15, east=310, west=220)
query.accept('netcdf4')

# Request data for vorticity
query.variables('Geopotential_height', 'Temperature', 'U-component_of_wind',
                'V-component_of_wind')
data = ncss.get_data(query)

# Pull out variables you want to use
hght_var = data.variables['Geopotential_height']
temp_var = data.variables['Temperature']
    def get_data(self):
        # Request the GFS data from the thredds server
        gfs_url = f"https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/GFS_Global_0p25deg_{self.now.year}{self.now.month:02d}{self.now.day:02d}_0000.grib2/catalog.xml"
        gfs_cat = TDSCatalog(gfs_url)
        #https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml'
        dataset = list(gfs_cat.datasets.values())[0]
        #print(dataset.access_urls)

        # Create NCSS object to access the NetcdfSubset
        ncss = NCSS(dataset.access_urls['NetcdfSubset'])

        # query the data from the server
        query = ncss.query()
        query.time_range(self.start, self.end)
        query.lonlat_box(north=80, south=0, east=310, west=200)
        query.accept('netcdf4')

        print("-----------------------------------------\n"\
              +"Sit back....\nOr get your coffee....\nOr do a Sudoku....\n"\
              +"-----------------------------------------\n")
        print("qeueing data...")
        #query.variables(str(self.query_list)).add_lonlat(True)
        for i in self.query_list:
            query.variables(i)
        #query.variables(vort_name,hgt_name,pv_press_name,mslp_name,upflux_rad_name,u_name,v_name,
        #               u_src_name,v_src_name,sfc_gust_name).add_lonlat(True)
        print("\ndone qeueing data.\n\ngrabbing data...\n")

        # Request data for the variables you want to use
        self.data = ncss.get_data(query)
        print("done grabbing data!!\n-_-_-_-_-_-_-_-_-_-_-_\n")
        return self.data
Beispiel #3
0
    def query_point(self):
        print('Retrieving selected variables...')

        ds = self.connect()

        ncss = NCSS(ds.access_urls['NetcdfSubset'])
        query = ncss.query()
        now = datetime.utcnow()
        timestamp = self.end

        #converts time from datetime to standard time
        self.init_time = now.strftime('%Hz-%d-%Y')
        self.end_time = (now +
                         pd.Timedelta(hours=timestamp)).strftime('%Hz-%d-%Y')

        query.time_range(now + pd.Timedelta(hours=self.start),
                         now + pd.Timedelta(hours=timestamp))
        query.accept('netcdf4')
        query.lonlat_point(self.lon, self.lat)

        #pull temperature, cloud cover, and precip data
        query.variables(self.variables)
        data = ncss.get_data(query)

        return data
Beispiel #4
0
def build_query(west=-58.5, east=32, south=42, north=74):
    metar = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/nws/metar/'
                       'ncdecoded/catalog.xml')
    dataset = list(metar.datasets.values())[0]
    print(list(dataset.access_urls))

    # Access netcdf subset and use siphon to request data
    ncss_url = dataset.access_urls['NetcdfSubset']
    ncss = NCSS(ncss_url)
    print(ncss.variables)

    # get current date and time
    now = datetime.utcnow()
    now = datetime(now.year, now.month, now.day, now.hour)

    # build the query
    query = ncss.query()
    query.lonlat_box(west, east, south, north)
    query.time(now)
    query.variables('air_temperature', 'dew_point_temperature', 'wind_speed',
                    'precipitation_amount_hourly', 'hectoPascal_ALTIM',
                    'air_pressure_at_sea_level', 'wind_from_direction',
                    'cloud_area_fraction', 'weather', 'report', 'wind_gust')
    query.accept('csv')
    return ncss, query
Beispiel #5
0
def ncss_subset(tds):

    ds_list = tds_connect(tds)
    dset1 = ds_list[0]
    ncss_obj = NCSS(dset1.access_urls['NetcdfSubset'])
    subset = ncss_obj.query()
    subset.lonlat_box(142, 149, -45, -38)
    subset.accept('netcdf')
    subset.variables('tasmax', 'tasmin')
    #data = ncss.get_data(subset)
    return subset
Beispiel #6
0
def get_obs(ts, mybb):
    # copied from the browser url box
    metar_cat_url = 'http://thredds.ucar.edu/thredds/catalog/nws/metar/ncdecoded/catalog.xml?dataset=nws/metar/ncdecoded/Metar_Station_Data_fc.cdmr'
    # parse the xml
    metar_cat = TDSCatalog(metar_cat_url)
    # what datasets are here? only one "dataset" in this catalog
    dataset = list(metar_cat.datasets.values())[0]
    ncss_url = dataset.access_urls["NetcdfSubset"]
    ncss = NCSS(ncss_url)

    query = ncss.query().accept('csv').time(ts - datetime.timedelta(minutes=1))
    query.lonlat_box(**mybb)
    query.variables('air_temperature', 'dew_point_temperature', 'inches_ALTIM',
                    'wind_speed', 'wind_from_direction', 'cloud_area_fraction', 'weather')

    try:
        data = ncss.get_data(query)
        lats = data['latitude'][:]
        lons = data['longitude'][:]
        tair = data['air_temperature'][:]
        dewp = data['dew_point_temperature'][:]
        slp = (data['inches_ALTIM'][:] * units('inHg')).to('mbar')

        # Convert wind to components
        u, v = mpcalc.get_wind_components(data['wind_speed'] * units.knot,
                                          data['wind_from_direction'] * units.deg)

        # Need to handle missing (NaN) and convert to proper code
        cloud_cover = 8 * data['cloud_area_fraction']
        cloud_cover[np.isnan(cloud_cover)] = 9
        cloud_cover = cloud_cover.astype(np.int)

        # For some reason these come back as bytes instead of strings
        stid = [s.decode() for s in data['station']]

        # Convert the text weather observations to WMO codes we can map to symbols
        if data['weather'].dtype != bool:
            wx_text = [s.decode('ascii') for s in data['weather']]
            wx_codes = np.array(list(to_code(wx_text)))
        else:
            wx_codes = np.array([0] * len(data['weather']))

        sfc_data = {'latitude': lats, 'longitude': lons,
                    'air_temperature': tair, 'dew_point_temperature': dewp, 'eastward_wind': u,
                    'northward_wind': v, 'cloud_coverage': cloud_cover,
                    'air_pressure_at_sea_level': slp, 'present_weather': wx_codes}

        have_obs = True
    except:
        have_obs = False
        sfc_data = {}

    return sfc_data, have_obs
Beispiel #7
0
    def get_sounding(source,  lat, long):
        # source unused for now bc testing only on ncss
        source_place_holder = source
        #print(source_place_holder)
        best_gfs = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/' +
                              'catalog.xml?dataset=grib/NCEP/GFS/Global_0p5deg/Best')
        best_ds = list(best_gfs.datasets.values())[0]

        ncss = NCSS(best_ds.access_urls['NetcdfSubset'])
        query = ncss.query()
        query.lonlat_point(long, lat).time(datetime.utcnow())
        query.accept('netcdf4')
        query.variables('Temperature_isobaric', 'Relative_humidity_isobaric', 'u-component_of_wind_isobaric',
                        'v-component_of_wind_isobaric')

        data = ncss.get_data(query)

        temp = data.variables['Temperature_isobaric']
        temp_vals = temp[:].squeeze() * units.kelvin
        relh = data.variables['Relative_humidity_isobaric']
        relh_values = relh[:] / 100
        td = dewpoint_rh(temp_vals, relh_values)
        td_vals = td[:].squeeze()
        press = data.variables['isobaric3']
        press_vals = press[:].squeeze()

        u_wind = data.variables['u-component_of_wind_isobaric']
        u_wind_vals = u_wind[:].squeeze()

        v_wind = data.variables['v-component_of_wind_isobaric']
        v_wind_vals = v_wind[:].squeeze()
        # Put temp, dewpoint, pressure, u/v winds into numpy arrays and reorder
        t = np.array(temp_vals)[::-1]
        td = np.array(td_vals)[::-1]
        p = np.array(press_vals)[::-1]
        u = np.array(u_wind_vals)[::-1]
        v = np.array(v_wind_vals)[::-1]

        # Change units for proper skew-T
        p = (p * units.pascals).to('mbar')
        t = (t * units.kelvin).to('degC')
        td = td * units.degC
        u = (u * units('m/s')).to('knot')
        v = (v * units('m/s')).to('knot')
        # spd = spd * units.knot
        # direc = direc * units.deg
        # u, v = get_wind_components(spd, direc)

        return t, td, p, u, v, lat, long, str(datetime.utcnow())[:-7]
Beispiel #8
0
def get_closest_gfs(time, level, field):
    """
    Retreive the current best 0.25 deg GFS model for a given field, level, time.

    time : datetime object
    level : level of results (in hPa)
    field : CF field to retrieve
    """

    # Get the catalog and best GFS entry
    catalog = TDSCatalog(
        'http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml'
    )
    best_gfs = list(catalog.datasets.values())[1]

    # Using NCSS, build a query and getch the data
    ncss = NCSS(best_gfs.access_urls['NetcdfSubset'])
    query = ncss.query()
    query.lonlat_box(north=90, south=10, east=360, west=160)
    query.vertical_level(level)
    query.time(time)
    query.accept('netcdf4')
    query.variables(field)
    data = ncss.get_data(query)

    # Pull out the variables we will use
    lat_var = data.variables['lat']
    lon_var = data.variables['lon']
    data_var = data.variables[field]

    # Find the correct time dimension name
    for coord in data_var.coordinates.split():
        if 'time' in coord:
            time_var = data.variables[coord]
            break

    # Convert number of hours since the reference time into an actual date
    time_vals = netCDF4.num2date(time_var[:].squeeze(), time_var.units)

    # Combine 1D latitude and longitudes into a 2D grid of locations
    lon_2d, lat_2d = np.meshgrid(lon_var[:], lat_var[:])

    # Filter the data to smooth it out a bit
    data_var = ndimage.gaussian_filter(data_var[:][0][0], sigma=1.5, order=0)

    return time_vals, lat_2d, lon_2d, data_var
Beispiel #9
0
def return_gfs():
    best_gfs = TDSCatalog(
        'http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/catalog.xml?dataset=grib/NCEP/GFS/Global_0p5deg/Best'
    )
    best_gfs.datasets
    best_ds = list(best_gfs.datasets.values())[0]
    best_ds.access_urls
    return NCSS(best_ds.access_urls['NetcdfSubset'])
Beispiel #10
0
    def set_dataset(self):
        '''
        Retrieves the designated dataset, creates NCSS object, and
        creates a NCSS query object.
        '''

        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()
Beispiel #11
0
def get_data(lon_w, lon_e, lat_s, lat_n, variable):
    """TODO
        Add reset, change colors of map, variable selection, model selection, lat/long validator
    """
    cat_url = 'http://thredds-jumbo.unidata.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml'
    latest_gfs = get_latest_access_url(cat_url, 'NetcdfSubset')
    ncss = NCSS(latest_gfs)

    query = ncss.query()
    query.lonlat_box(west=lon_w, east=lon_e, south=lat_s, north=lat_n).all_times()
    query.accept('netcdf4')
    query.variables(variable_dict(variable))
    data = ncss.get_data(query)
    list(data.variables.keys())
    var1 = data.variables[variable_dict(variable)]

    # only works if has name time+... or only has 1 dimension
    for dim in var1.dimensions:
        if 'time' in dim:
            time_name = dim
    if time_name is None:
        raise ValueError("Couldn't find a time dimension for " + var1.name)
    time_1d = data.variables[time_name]
    lat_1d = data.variables['lat']
    lon_1d = data.variables['lon']

    # Reduce the dimensions of the data
    lat_1d = lat_1d[:].squeeze()
    lon_1d = lon_1d[:].squeeze()

    # Convert the number of hours since the reference time to an actual date
    time_val = num2date(time_1d[:].squeeze(), time_1d.units)

    # Combine latitude and longitudes
    lon_2d, lat_2d = np.meshgrid(lon_1d, lat_1d)

    # Flatten() combines all the lists from meshgrid into one list
    full_lat_1d = lat_2d.flatten()
    full_lon_1d = lon_2d.flatten()

    # Create one list that pairs longs and lats
    lonlat_list = zip(full_lon_1d, full_lat_1d)

    return lon_2d, lat_2d, var1, time_val, lonlat_list
def get_closest_gfs(time, level, field):
    """
    Retreive the current best 0.25 deg GFS model for a given field, level, time.

    time : datetime object
    level : level of results (in hPa)
    field : CF field to retrieve
    """

    # Get the catalog and best GFS entry
    catalog = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml')
    best_gfs = list(catalog.datasets.values())[1]

    # Using NCSS, build a query and getch the data
    ncss = NCSS(best_gfs.access_urls['NetcdfSubset'])
    query = ncss.query()
    query.lonlat_box(north=90, south=10, east=360, west=160)
    query.vertical_level(level)
    query.time(time)
    query.accept('netcdf4')
    query.variables(field)
    data = ncss.get_data(query)

    # Pull out the variables we will use
    lat_var = data.variables['lat']
    lon_var = data.variables['lon']
    data_var = data.variables[field]

    # Find the correct time dimension name
    for coord in data_var.coordinates.split():
        if 'time' in coord:
            time_var = data.variables[coord]
            break

    # Convert number of hours since the reference time into an actual date
    time_vals = netCDF4.num2date(time_var[:].squeeze(), time_var.units)

    # Combine 1D latitude and longitudes into a 2D grid of locations
    lon_2d, lat_2d = np.meshgrid(lon_var[:], lat_var[:])

    # Filter the data to smooth it out a bit
    data_var = ndimage.gaussian_filter(data_var[:][0][0], sigma=1.5, order=0)

    return time_vals, lat_2d, lon_2d, data_var
Beispiel #13
0
def retrieve_point_forecast(ds, lat, lon, var, ensemble):
    '''
    ds: a siphon dataset object
    lat:
    lon:
    var: model variable name to extract
    ensemble: True/False indicator if the ds object contains ensemble data

    Given a siphon dataset object, retrieve the forecast variable for the
    given coordinates.

    If the object is from an ensemble, the variables object has an
    additional dimension.
    '''

    ncss = NCSS(ds.access_urls['NetcdfSubset'])
    query = ncss.query()
    query.lonlat_point(lon, lat)
    query.all_times()
    query.variables(var).accept('netcdf')

    data = ncss.get_data(query)
    temps = data.variables[var]
    time = data.variables['time']
    time_vals = num2date(time[:].squeeze(), time.units)
    ureg = UnitRegistry()

    if ensemble:
        ensemble_temp_series = []
        num_ens = temps.shape[2]
        for i in range(num_ens):
            temp_vals = ((temps[:, :, i, :].squeeze() * ureg.kelvin)
                         .to(ureg.degF))
            temp_series = pd.Series(temp_vals, index=time_vals)
            ensemble_temp_series.append(temp_series)
        return ensemble_temp_series
    else:
        temp_vals = (temps[:, :, 0].squeeze() * ureg.kelvin).to(ureg.degF)
        temp_series = pd.Series(temp_vals, index=time_vals)
        return temp_series
Beispiel #14
0
def retrieve_gfs_analysis(
    time,
    lat=50,
    variables=['Geopotential_height_isobaric',
               'u-component_of_wind_isobaric']):

    url = time.strftime(
        'https://www.ncei.noaa.gov/thredds/ncss/grid/gfs-g3-anl-files/%Y%m/%Y%m%d/gfsanl_3_%Y%m%d_%H%M_000.grb2/'
    )
    ncss = NCSS(url)

    query = ncss.query()
    query.all_times().variables(*variables)

    query.lonlat_box(north=lat, south=lat, east=360., west=0.)
    nc_north = ncss.get_data(query)

    query.lonlat_box(north=-lat, south=-lat, east=360., west=0.)
    nc_south = ncss.get_data(query)

    data_north = xr.open_dataset(xr.backends.NetCDF4DataStore(nc_north))
    data_south = xr.open_dataset(xr.backends.NetCDF4DataStore(nc_south))

    return xr.concat([data_north, data_south], dim='lat')
Beispiel #15
0
    def set_dataset(self):
        '''
        Retreives the designated dataset, creates NCSS object, and 
        initiates a NCSS query.

        '''
        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()        
Beispiel #16
0
 def setup(self):
     dt = datetime(2015, 6, 12, 15, 0, 0)
     self.ncss = NCSS(self.server + self.urlPath)
     self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt)
     self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')
Beispiel #17
0
class TestNCSS(object):
    server = 'http://thredds.ucar.edu/thredds/ncss/'
    urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2'

    @recorder.use_cassette('ncss_test_metadata')
    def setup(self):
        dt = datetime(2015, 6, 12, 15, 0, 0)
        self.ncss = NCSS(self.server + self.urlPath)
        self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt)
        self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')

    def test_good_query(self):
        assert self.ncss.validate_query(self.nq)

    def test_bad_query(self):
        self.nq.variables('foo')
        assert not self.ncss.validate_query(self.nq)

    def test_bad_query_no_vars(self):
        self.nq.var.clear()
        assert not self.ncss.validate_query(self.nq)

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_xml_point(self):
        self.nq.accept('xml')
        xml_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in xml_data
        assert 'Relative_humidity_isobaric' in xml_data
        assert xml_data['lat'][0] == 40
        assert xml_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_csv_point(self):
        self.nq.accept('csv')
        csv_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in csv_data
        assert 'Relative_humidity_isobaric' in csv_data
        assert csv_data['lat'][0] == 40
        assert csv_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unit_handler_csv(self):
        self.nq.accept('csv')
        self.ncss.unit_handler = tuple_unit_handler
        csv_data = self.ncss.get_data(self.nq)

        temp = csv_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = csv_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_unit_handler_xml(self):
        self.nq.accept('xml')
        self.ncss.unit_handler = tuple_unit_handler
        xml_data = self.ncss.get_data(self.nq)

        temp = xml_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = xml_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_netcdf_point')
    def test_netcdf_point(self):
        self.nq.accept('netcdf')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_netcdf4_point')
    def test_netcdf4_point(self):
        self.nq.accept('netcdf4')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_vertical_level')
    def test_vertical_level(self):
        self.nq.accept('csv').vertical_level(50000)
        csv_data = self.ncss.get_data(self.nq)

        assert str(csv_data['Temperature_isobaric'])[:6] == '263.39'

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_raw_csv(self):
        self.nq.accept('csv')
        csv_data = self.ncss.get_data_raw(self.nq)

        assert csv_data.startswith(b'date,lat')

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unknown_mime(self):
        self.nq.accept('csv')
        with response_context():
            csv_data = self.ncss.get_data(self.nq)
            assert csv_data.startswith(b'date,lat')
Beispiel #18
0
def give_me_latest_gfs():
    best_gfs = 'http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/catalog.xml'
    latest_gfs = get_latest_access_url(best_gfs, "NetcdfSubset")
    ncss = NCSS(latest_gfs)
    return ncss
Beispiel #19
0
class ForecastModel(object):
    """
    An object for querying and holding forecast model information for
    use within the pvlib library.

    Simplifies use of siphon library on a THREDDS server.

    Parameters
    ----------
    model_type: string
        UNIDATA category in which the model is located.
    model_name: string
        Name of the UNIDATA forecast model.
    set_type: string
        Model dataset type.

    Attributes
    ----------
    access_url: string
        URL specifying the dataset from data will be retrieved.
    base_tds_url : string
        The top level server address
    catalog_url : string
        The url path of the catalog to parse.
    data: pd.DataFrame
        Data returned from the query.
    data_format: string
        Format of the forecast data being requested from UNIDATA.
    dataset: Dataset
        Object containing information used to access forecast data.
    dataframe_variables: list
        Model variables that are present in the data.
    datasets_list: list
        List of all available datasets.
    fm_models: Dataset
        TDSCatalog object containing all available
        forecast models from UNIDATA.
    fm_models_list: list
        List of all available forecast models from UNIDATA.
    latitude: list
        A list of floats containing latitude values.
    location: Location
        A pvlib Location object containing geographic quantities.
    longitude: list
        A list of floats containing longitude values.
    lbox: boolean
        Indicates the use of a location bounding box.
    ncss: NCSS object
        NCSS
    model_name: string
        Name of the UNIDATA forecast model.
    model: Dataset
        A dictionary of Dataset object, whose keys are the name of the
        dataset's name.
    model_url: string
        The url path of the dataset to parse.
    modelvariables: list
        Common variable names that correspond to queryvariables.
    query: NCSS query object
        NCSS object used to complete the forecast data retrival.
    queryvariables: list
        Variables that are used to query the THREDDS Data Server.
    time: DatetimeIndex
        Time range.
    variables: dict
        Defines the variables to obtain from the weather
        model and how they should be renamed to common variable names.
    units: dict
        Dictionary containing the units of the standard variables
        and the model specific variables.
    vert_level: float or integer
        Vertical altitude for query data.
    """

    access_url_key = 'NetcdfSubset'
    catalog_url = 'https://thredds.ucar.edu/thredds/catalog.xml'
    base_tds_url = catalog_url.split('/thredds/')[0]
    data_format = 'netcdf'

    units = {
        'temp_air': 'C',
        'wind_speed': 'm/s',
        'ghi': 'W/m^2',
        'ghi_raw': 'W/m^2',
        'dni': 'W/m^2',
        'dhi': 'W/m^2',
        'total_clouds': '%',
        'low_clouds': '%',
        'mid_clouds': '%',
        'high_clouds': '%'
    }

    def __init__(self, model_type, model_name, set_type, vert_level=None):
        self.model_type = model_type
        self.model_name = model_name
        self.set_type = set_type
        self.connected = False
        self.vert_level = vert_level

    def connect_to_catalog(self):
        self.catalog = TDSCatalog(self.catalog_url)
        self.fm_models = TDSCatalog(
            self.catalog.catalog_refs[self.model_type].href)
        self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys()))

        try:
            model_url = self.fm_models.catalog_refs[self.model_name].href
        except ParseError:
            raise ParseError(self.model_name + ' model may be unavailable.')

        try:
            self.model = TDSCatalog(model_url)
        except HTTPError:
            try:
                self.model = TDSCatalog(model_url)
            except HTTPError:
                raise HTTPError(self.model_name + ' model may be unavailable.')

        self.datasets_list = list(self.model.datasets.keys())
        self.set_dataset()
        self.connected = True

    def __repr__(self):
        return '{}, {}'.format(self.model_name, self.set_type)

    def set_dataset(self):
        '''
        Retrieves the designated dataset, creates NCSS object, and
        creates a NCSS query object.
        '''

        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()

    def set_query_time_range(self, start, end):
        """
        Parameters
        ----------
        start : datetime.datetime, pandas.Timestamp
            Must be tz-localized.
        end : datetime.datetime, pandas.Timestamp
            Must be tz-localized.

        Notes
        -----
        Assigns ``self.start``, ``self.end``. Modifies ``self.query``
        """
        self.start = pd.Timestamp(start)
        self.end = pd.Timestamp(end)
        if self.start.tz is None or self.end.tz is None:
            raise TypeError('start and end must be tz-localized')
        self.query.time_range(self.start, self.end)

    def set_query_latlon(self):
        '''
        Sets the NCSS query location latitude and longitude.
        '''

        if (isinstance(self.longitude, list)
                and isinstance(self.latitude, list)):
            self.lbox = True
            # west, east, south, north
            self.query.lonlat_box(self.longitude[0], self.longitude[1],
                                  self.latitude[0], self.latitude[1])
        else:
            self.lbox = False
            self.query.lonlat_point(self.longitude, self.latitude)

    def set_location(self, tz, latitude, longitude):
        '''
        Sets the location for the query.

        Parameters
        ----------
        tz: tzinfo
            Timezone of the query
        latitude: float
            Latitude of the query
        longitude: float
            Longitude of the query

        Notes
        -----
        Assigns ``self.location``.
        '''
        self.location = Location(latitude, longitude, tz=tz)

    def get_data(self,
                 latitude,
                 longitude,
                 start,
                 end,
                 vert_level=None,
                 query_variables=None,
                 close_netcdf_data=True,
                 **kwargs):
        """
        Submits a query to the UNIDATA servers using Siphon NCSS and
        converts the netcdf data to a pandas DataFrame.

        Parameters
        ----------
        latitude: float
            The latitude value.
        longitude: float
            The longitude value.
        start: datetime or timestamp
            The start time.
        end: datetime or timestamp
            The end time.
        vert_level: None, float or integer, default None
            Vertical altitude of interest.
        query_variables: None or list, default None
            If None, uses self.variables.
        close_netcdf_data: bool, default True
            Controls if the temporary netcdf data file should be closed.
            Set to False to access the raw data.
        **kwargs:
            Additional keyword arguments are silently ignored.

        Returns
        -------
        forecast_data : DataFrame
            column names are the weather model's variable names.
        """

        if not self.connected:
            self.connect_to_catalog()

        if vert_level is not None:
            self.vert_level = vert_level

        if query_variables is None:
            self.query_variables = list(self.variables.values())
        else:
            self.query_variables = query_variables

        self.set_query_time_range(start, end)

        self.latitude = latitude
        self.longitude = longitude
        self.set_query_latlon()  # modifies self.query
        self.set_location(self.start.tz, latitude, longitude)

        if self.vert_level is not None:
            self.query.vertical_level(self.vert_level)

        self.query.variables(*self.query_variables)
        self.query.accept(self.data_format)

        self.netcdf_data = self.ncss.get_data(self.query)

        # might be better to go to xarray here so that we can handle
        # higher dimensional data for more advanced applications
        self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables,
                                        self.start, self.end)

        if close_netcdf_data:
            self.netcdf_data.close()

        return self.data

    def process_data(self, data, **kwargs):
        """
        Defines the steps needed to convert raw forecast data
        into processed forecast data. Most forecast models implement
        their own version of this method which also call this one.

        Parameters
        ----------
        data: DataFrame
            Raw forecast data

        Returns
        -------
        data: DataFrame
            Processed forecast data.
        """
        data = self.rename(data)
        return data

    def get_processed_data(self, *args, **kwargs):
        """
        Get and process forecast data.

        Parameters
        ----------
        *args: positional arguments
            Passed to get_data
        **kwargs: keyword arguments
            Passed to get_data and process_data

        Returns
        -------
        data: DataFrame
            Processed forecast data
        """
        return self.process_data(self.get_data(*args, **kwargs), **kwargs)

    def rename(self, data, variables=None):
        """
        Renames the columns according the variable mapping.

        Parameters
        ----------
        data: DataFrame
        variables: None or dict, default None
            If None, uses self.variables

        Returns
        -------
        data: DataFrame
            Renamed data.
        """
        if variables is None:
            variables = self.variables
        return data.rename(columns={y: x for x, y in variables.items()})

    def _netcdf2pandas(self, netcdf_data, query_variables, start, end):
        """
        Transforms data from netcdf to pandas DataFrame.

        Parameters
        ----------
        data: netcdf
            Data returned from UNIDATA NCSS query.
        query_variables: list
            The variables requested.
        start: Timestamp
            The start time
        end: Timestamp
            The end time

        Returns
        -------
        pd.DataFrame
        """
        # set self.time
        try:
            time_var = 'time'
            self.set_time(netcdf_data.variables[time_var])
        except KeyError:
            # which model does this dumb thing?
            time_var = 'time1'
            self.set_time(netcdf_data.variables[time_var])

        data_dict = {}
        for key, data in netcdf_data.variables.items():
            # if accounts for possibility of extra variable returned
            if key not in query_variables:
                continue
            squeezed = data[:].squeeze()
            if squeezed.ndim == 1:
                data_dict[key] = squeezed
            elif squeezed.ndim == 2:
                for num, data_level in enumerate(squeezed.T):
                    data_dict[key + '_' + str(num)] = data_level
            else:
                raise ValueError('cannot parse ndim > 2')

        data = pd.DataFrame(data_dict, index=self.time)
        # sometimes data is returned as hours since T0
        # where T0 is before start. Then the hours between
        # T0 and start are added *after* end. So sort and slice
        # to remove the garbage
        data = data.sort_index().loc[start:end]
        return data

    def set_time(self, time):
        '''
        Converts time data into a pandas date object.

        Parameters
        ----------
        time: netcdf
            Contains time information.

        Returns
        -------
        pandas.DatetimeIndex
        '''
        times = num2date(time[:].squeeze(),
                         time.units,
                         only_use_cftime_datetimes=False,
                         only_use_python_datetimes=True)
        self.time = pd.DatetimeIndex(pd.Series(times), tz=self.location.tz)

    def cloud_cover_to_ghi_linear(self,
                                  cloud_cover,
                                  ghi_clear,
                                  offset=35,
                                  **kwargs):
        """
        Convert cloud cover to GHI using a linear relationship.

        0% cloud cover returns ghi_clear.

        100% cloud cover returns offset*ghi_clear.

        Parameters
        ----------
        cloud_cover: numeric
            Cloud cover in %.
        ghi_clear: numeric
            GHI under clear sky conditions.
        offset: numeric, default 35
            Determines the minimum GHI.
        kwargs
            Not used.

        Returns
        -------
        ghi: numeric
            Estimated GHI.

        References
        ----------
        Larson et. al. "Day-ahead forecasting of solar power output from
        photovoltaic plants in the American Southwest" Renewable Energy
        91, 11-20 (2016).
        """

        offset = offset / 100.
        cloud_cover = cloud_cover / 100.
        ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear
        return ghi

    def cloud_cover_to_irradiance_clearsky_scaling(self,
                                                   cloud_cover,
                                                   method='linear',
                                                   **kwargs):
        """
        Estimates irradiance from cloud cover in the following steps:

        1. Determine clear sky GHI using Ineichen model and
           climatological turbidity.
        2. Estimate cloudy sky GHI using a function of
           cloud_cover e.g.
           :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear`
        3. Estimate cloudy sky DNI using the DISC model.
        4. Calculate DHI from DNI and GHI.

        Parameters
        ----------
        cloud_cover : Series
            Cloud cover in %.
        method : str, default 'linear'
            Method for converting cloud cover to GHI.
            'linear' is currently the only option.
        **kwargs
            Passed to the method that does the conversion

        Returns
        -------
        irrads : DataFrame
            Estimated GHI, DNI, and DHI.
        """
        solpos = self.location.get_solarposition(cloud_cover.index)
        cs = self.location.get_clearsky(cloud_cover.index,
                                        model='ineichen',
                                        solar_position=solpos)

        method = method.lower()
        if method == 'linear':
            ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'],
                                                 **kwargs)
        else:
            raise ValueError('invalid method argument')

        dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni']
        dhi = ghi - dni * np.cos(np.radians(solpos['zenith']))

        irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0)
        return irrads

    def cloud_cover_to_transmittance_linear(self,
                                            cloud_cover,
                                            offset=0.75,
                                            **kwargs):
        """
        Convert cloud cover to atmospheric transmittance using a linear
        model.

        0% cloud cover returns offset.

        100% cloud cover returns 0.

        Parameters
        ----------
        cloud_cover : numeric
            Cloud cover in %.
        offset : numeric, default 0.75
            Determines the maximum transmittance.
        kwargs
            Not used.

        Returns
        -------
        ghi : numeric
            Estimated GHI.
        """
        transmittance = ((100.0 - cloud_cover) / 100.0) * offset

        return transmittance

    def cloud_cover_to_irradiance_liujordan(self, cloud_cover, **kwargs):
        """
        Estimates irradiance from cloud cover in the following steps:

        1. Determine transmittance using a function of cloud cover e.g.
           :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear`
        2. Calculate GHI, DNI, DHI using the
           :py:func:`pvlib.irradiance.liujordan` model

        Parameters
        ----------
        cloud_cover : Series

        Returns
        -------
        irradiance : DataFrame
            Columns include ghi, dni, dhi
        """
        # in principle, get_solarposition could use the forecast
        # pressure, temp, etc., but the cloud cover forecast is not
        # accurate enough to justify using these minor corrections
        solar_position = self.location.get_solarposition(cloud_cover.index)
        dni_extra = get_extra_radiation(cloud_cover.index)
        airmass = self.location.get_airmass(cloud_cover.index)

        transmittance = self.cloud_cover_to_transmittance_linear(
            cloud_cover, **kwargs)

        irrads = liujordan(solar_position['apparent_zenith'],
                           transmittance,
                           airmass['airmass_absolute'],
                           dni_extra=dni_extra)
        irrads = irrads.fillna(0)

        return irrads

    def cloud_cover_to_irradiance(self,
                                  cloud_cover,
                                  how='clearsky_scaling',
                                  **kwargs):
        """
        Convert cloud cover to irradiance. A wrapper method.

        Parameters
        ----------
        cloud_cover : Series
        how : str, default 'clearsky_scaling'
            Selects the method for conversion. Can be one of
            clearsky_scaling or liujordan.
        **kwargs
            Passed to the selected method.

        Returns
        -------
        irradiance : DataFrame
            Columns include ghi, dni, dhi
        """

        how = how.lower()
        if how == 'clearsky_scaling':
            irrads = self.cloud_cover_to_irradiance_clearsky_scaling(
                cloud_cover, **kwargs)
        elif how == 'liujordan':
            irrads = self.cloud_cover_to_irradiance_liujordan(
                cloud_cover, **kwargs)
        else:
            raise ValueError('invalid how argument')

        return irrads

    def kelvin_to_celsius(self, temperature):
        """
        Converts Kelvin to celsius.

        Parameters
        ----------
        temperature: numeric

        Returns
        -------
        temperature: numeric
        """
        return temperature - 273.15

    def isobaric_to_ambient_temperature(self, data):
        """
        Calculates temperature from isobaric temperature.

        Parameters
        ----------
        data: DataFrame
            Must contain columns pressure, temperature_iso,
            temperature_dew_iso. Input temperature in K.

        Returns
        -------
        temperature : Series
            Temperature in K
        """

        P = data['pressure'] / 100.0  # noqa: N806
        Tiso = data['temperature_iso']  # noqa: N806
        Td = data['temperature_dew_iso'] - 273.15  # noqa: N806

        # saturation water vapor pressure
        e = 6.11 * 10**((7.5 * Td) / (Td + 273.3))

        # saturation water vapor mixing ratio
        w = 0.622 * (e / (P - e))

        temperature = Tiso - ((2.501 * 10.**6) / 1005.7) * w

        return temperature

    def uv_to_speed(self, data):
        """
        Computes wind speed from wind components.

        Parameters
        ----------
        data : DataFrame
            Must contain the columns 'wind_speed_u' and 'wind_speed_v'.

        Returns
        -------
        wind_speed : Series
        """
        wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2)

        return wind_speed

    def gust_to_speed(self, data, scaling=1 / 1.4):
        """
        Computes standard wind speed from gust.
        Very approximate and location dependent.

        Parameters
        ----------
        data : DataFrame
            Must contain the column 'wind_speed_gust'.

        Returns
        -------
        wind_speed : Series
        """
        wind_speed = data['wind_speed_gust'] * scaling

        return wind_speed
Beispiel #20
0
    raise ValueError('No time variable found for ' + var.name)


#####################################
# Obtain data

# Construct a TDSCatalog instance pointing to the gfs dataset
best_gfs = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/'
                      'NCEP/GFS/Global_0p5deg/catalog.xml')

# Pull out the dataset you want to use and look at the access URLs
best_ds = list(best_gfs.datasets.values())[1]
print(best_ds.access_urls)

# Create NCSS object to access the NetcdfSubset
ncss = NCSS(best_ds.access_urls['NetcdfSubset'])
print(best_ds.access_urls['NetcdfSubset'])

#####################################
# First Query for MSLP

# Create lat/lon box for location you want to get data for
query = ncss.query()
query.lonlat_box(north=50, south=30, east=-80,
                 west=-115).time(datetime.utcnow())
query.accept('netcdf4')

# Request data for MSLP
query.variables('MSLP_Eta_model_reduction_msl')
data = ncss.get_data(query)
Beispiel #21
0
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
from metpy.calc import get_wind_speed
from metpy.units import units
from netCDF4 import num2date
import numpy as np
import scipy.ndimage as ndimage
from siphon.ncss import NCSS

##################################
# Set up netCDF Subset Service link
dt = datetime(2016, 4, 16, 18)
ncss = NCSS(
    'http://nomads.ncdc.noaa.gov/thredds/ncss/grid/namanl/'
    '{0:%Y%m}/{0:%Y%m%d}/namanl_218_{0:%Y%m%d}_{0:%H}00_000.grb'.format(dt))

# Data Query
hgt = ncss.query().time(dt)
hgt.variables('Geopotential_height', 'u_wind', 'v_wind').add_lonlat()

# Actually getting the data
data = ncss.get_data(hgt)

##################################
# Pull apart the data

# Get dimension names to pull appropriate variables
dtime = data.variables['Geopotential_height'].dimensions[0]
dlev = data.variables['Geopotential_height'].dimensions[1]
                '\n' + str(np.int(data[mxy[i], mxx[i]])),
                color=color,
                size=12,
                clip_on=True,
                fontweight='bold',
                horizontalalignment='center',
                verticalalignment='top',
                transform=transform)


###############################
# Get NARR data
dattim = datetime(1999, 1, 3, 0)

ncss = NCSS(
    'https://www.ncei.noaa.gov/thredds/ncss/grid/narr-a-files/{0:%Y%m}/{0:%Y%m%d}/'
    'narr-a_221_{0:%Y%m%d}_{0:%H}00_000.grb'.format(dattim))
query = ncss.query()
query.all_times().variables(
    'Pressure_reduced_to_MSL_msl',
    'Geopotential_height_isobaric').add_lonlat().accept('netcdf')
data = ncss.get_data(query)

###############################
# Extract data into variables

# Grab pressure levels
plev = list(data.variables['isobaric1'][:])

# Grab lat/lons and make all lons 0-360
lats = data.variables['lat'][:]
import matplotlib.pyplot as plt
import metpy.calc as mcalc
from metpy.units import units
from netCDF4 import num2date
import numpy as np
import numpy.ma as ma
from scipy.ndimage import gaussian_filter
from siphon.ncss import NCSS

###########################
# **Get the data**
#
# This example will use data from the North American Mesoscale Model Analysis
# (https://nomads.ncdc.gov/) for 12 UTC 27 April 2011.
ncss = NCSS(
    'https://nomads.ncdc.noaa.gov/thredds/ncss/grid/namanl/201104/20110427/'
    'namanl_218_20110427_1800_000.grb')

# Query for required variables
gfsdata = ncss.query().all_times()
gfsdata.variables('Geopotential_height', 'u_wind', 'v_wind', 'Temperature',
                  'Relative_humidity', 'Best_4-layer_lifted_index',
                  'Absolute_vorticity', 'Pressure_reduced_to_MSL',
                  'Dew_point_temperature').add_lonlat()

# Set the lat/lon box for the data to pull in.
gfsdata.lonlat_box(-135, -60, 15, 65)

# Actually getting the data
data = ncss.get_data(gfsdata)
Beispiel #24
0
 def setup(self):
     """Set up for tests with a default valid query."""
     dt = datetime(2015, 6, 12, 15, 0, 0)
     self.ncss = NCSS(self.server + self.urlPath)
     self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt)
     self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')
Beispiel #25
0
########################################
# Begin Data Ingest
# -----------------

# Request METAR data from TDS
metar = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/nws/'
                   'metar/ncdecoded/catalog.xml')
dataset = list(metar.datasets.values())[0]
print(list(dataset.access_urls))

########################################
# What variables are available in dataset?

# Access netcdf subset and use siphon to request data
ncss_url = dataset.access_urls['NetcdfSubset']
ncss = NCSS(ncss_url)
print(ncss.variables)

########################################
# Set query to get desired data from Thredds server

# get current date and time
now = datetime.utcnow()
now = datetime(now.year, now.month, now.day, now.hour)

# define time range you want the data for
start = now - timedelta(days=1)
end = now

# build the query
query = ncss.query()
from metpy.units import units
from netCDF4 import num2date
import numpy as np
import numpy.ma as ma
from scipy.ndimage import gaussian_filter
from siphon.ncss import NCSS

###########################
# **Get the data**
#
# This example will use data from the North American Mesoscale Model Analysis
# (https://nomads.ncdc.gov/) for 12 UTC 27 April 2011.

base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/namanl/'
dt = datetime(2011, 4, 27)
ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/namanl_218_{dt:%Y%m%d}_'
            '1800_000.grb'.format(base_url, dt=dt))

# Query for required variables
gfsdata = ncss.query().all_times()
gfsdata.variables(
    'Geopotential_height_isobaric', 'u-component_of_wind_isobaric',
    'v-component_of_wind_isobaric', 'Temperature_isobaric',
    'Relative_humidity_isobaric',
    'Best_4_layer_lifted_index_layer_between_two_pressure_'
    'difference_from_ground_layer', 'Absolute_vorticity_isobaric',
    'Pressure_reduced_to_MSL_msl',
    'Dew_point_temperature_height_above_ground').add_lonlat()

# Set the lat/lon box for the data to pull in.
gfsdata.lonlat_box(-135, -60, 15, 65)
Beispiel #27
0
                                cmap=colormap,
                                transform=ccrs.PlateCarree())
    cax = plt.subplot(gs[1])
    cbar = plt.colorbar(contourfill,
                        cax=cax,
                        orientation='horizontal',
                        extend='max',
                        extendrect=True)


# Latest GFS Dataset
cat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/'
                 'NCEP/GFS/Global_0p5deg/latest.xml')
#cat.datasets = [GFS_Global_0p5deg_20201128_1200.grib2]
best_ds = list(cat.datasets.values())[0]
ncss = NCSS(best_ds.access_urls['NetcdfSubset'])  #NetCDF subset service object

now = datetime.utcnow()

data_hght = queryData(ncss, "height")
data_wind = queryData(ncss, "wind")

latitudes = data_hght.variables['lat'][:]
longitudes = data_hght.variables['lon'][:]
heights250hPa = data_hght.variables['Geopotential_height_isobaric'][:]

# Smooth the 250-hPa heights using a gaussian filter from scipy.ndimage
hgt_250, longitudes = cutil.add_cyclic_point(heights250hPa, coord=longitudes)
Z_250 = ndimage.gaussian_filter(hgt_250[0, 0, :, :], sigma=3, order=0)

u250 = (units(data_wind.variables['u-component_of_wind_isobaric'].units) *
Beispiel #28
0
class ForecastModel(object):
    """
    An object for querying and holding forecast model information for
    use within the pvlib library.

    Simplifies use of siphon library on a THREDDS server.

    Parameters
    ----------
    model_type: string
        UNIDATA category in which the model is located.
    model_name: string
        Name of the UNIDATA forecast model.
    set_type: string
        Model dataset type.

    Attributes
    ----------
    access_url: string
        URL specifying the dataset from data will be retrieved.
    base_tds_url : string
        The top level server address
    catalog_url : string
        The url path of the catalog to parse.
    data: pd.DataFrame
        Data returned from the query.
    data_format: string
        Format of the forecast data being requested from UNIDATA.
    dataset: Dataset
        Object containing information used to access forecast data.
    dataframe_variables: list
        Model variables that are present in the data.
    datasets_list: list
        List of all available datasets.
    fm_models: Dataset
        TDSCatalog object containing all available
        forecast models from UNIDATA.
    fm_models_list: list
        List of all available forecast models from UNIDATA.
    latitude: list
        A list of floats containing latitude values.
    location: Location
        A pvlib Location object containing geographic quantities.
    longitude: list
        A list of floats containing longitude values.
    lbox: boolean
        Indicates the use of a location bounding box.
    ncss: NCSS object
        NCSS
    model_name: string
        Name of the UNIDATA forecast model.
    model: Dataset
        A dictionary of Dataset object, whose keys are the name of the
        dataset's name.
    model_url: string
        The url path of the dataset to parse.
    modelvariables: list
        Common variable names that correspond to queryvariables.
    query: NCSS query object
        NCSS object used to complete the forecast data retrival.
    queryvariables: list
        Variables that are used to query the THREDDS Data Server.
    time: DatetimeIndex
        Time range.
    variables: dict
        Defines the variables to obtain from the weather
        model and how they should be renamed to common variable names.
    units: dict
        Dictionary containing the units of the standard variables
        and the model specific variables.
    vert_level: float or integer
        Vertical altitude for query data.
    """

    access_url_key = 'NetcdfSubset'
    catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml'
    base_tds_url = catalog_url.split('/thredds/')[0]
    data_format = 'netcdf'
    vert_level = 100000

    units = {
        'temp_air': 'C',
        'wind_speed': 'm/s',
        'ghi': 'W/m^2',
        'ghi_raw': 'W/m^2',
        'dni': 'W/m^2',
        'dhi': 'W/m^2',
        'total_clouds': '%',
        'low_clouds': '%',
        'mid_clouds': '%',
        'high_clouds': '%'}

    def __init__(self, model_type, model_name, set_type):
        self.model_type = model_type
        self.model_name = model_name
        self.set_type = set_type
        self.catalog = TDSCatalog(self.catalog_url)
        self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href)
        self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys()))

        try:
            model_url = self.fm_models.catalog_refs[model_name].href
        except ParseError:
            raise ParseError(self.model_name + ' model may be unavailable.')

        try:
            self.model = TDSCatalog(model_url)
        except HTTPError:
            try:
                self.model = TDSCatalog(model_url)
            except HTTPError:
                raise HTTPError(self.model_name + ' model may be unavailable.')

        self.datasets_list = list(self.model.datasets.keys())
        self.set_dataset()

    def __repr__(self):
        return '{}, {}'.format(self.model_name, self.set_type)

    def set_dataset(self):
        '''
        Retrieves the designated dataset, creates NCSS object, and
        creates a NCSS query object.
        '''

        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()

    def set_query_latlon(self):
        '''
        Sets the NCSS query location latitude and longitude.
        '''

        if (isinstance(self.longitude, list) and
            isinstance(self.latitude, list)):
            self.lbox = True
            # west, east, south, north
            self.query.lonlat_box(self.latitude[0], self.latitude[1],
                                  self.longitude[0], self.longitude[1])
        else:
            self.lbox = False
            self.query.lonlat_point(self.longitude, self.latitude)

    def set_location(self, time, latitude, longitude):
        '''
        Sets the location for the query.

        Parameters
        ----------
        time: datetime or DatetimeIndex
            Time range of the query.
        '''
        if isinstance(time, datetime.datetime):
            tzinfo = time.tzinfo
        else:
            tzinfo = time.tz

        if tzinfo is None:
            self.location = Location(latitude, longitude)
        else:
            self.location = Location(latitude, longitude, tz=tzinfo)

    def get_data(self, latitude, longitude, start, end,
                 vert_level=None, query_variables=None,
                 close_netcdf_data=True):
        """
        Submits a query to the UNIDATA servers using Siphon NCSS and
        converts the netcdf data to a pandas DataFrame.

        Parameters
        ----------
        latitude: float
            The latitude value.
        longitude: float
            The longitude value.
        start: datetime or timestamp
            The start time.
        end: datetime or timestamp
            The end time.
        vert_level: None, float or integer
            Vertical altitude of interest.
        variables: None or list
            If None, uses self.variables.
        close_netcdf_data: bool
            Controls if the temporary netcdf data file should be closed.
            Set to False to access the raw data.

        Returns
        -------
        forecast_data : DataFrame
            column names are the weather model's variable names.
        """
        if vert_level is not None:
            self.vert_level = vert_level

        if query_variables is None:
            self.query_variables = list(self.variables.values())
        else:
            self.query_variables = query_variables

        self.latitude = latitude
        self.longitude = longitude
        self.set_query_latlon()  # modifies self.query
        self.set_location(start, latitude, longitude)

        self.start = start
        self.end = end
        self.query.time_range(self.start, self.end)

        self.query.vertical_level(self.vert_level)
        self.query.variables(*self.query_variables)
        self.query.accept(self.data_format)

        self.netcdf_data = self.ncss.get_data(self.query)

        # might be better to go to xarray here so that we can handle
        # higher dimensional data for more advanced applications
        self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables)

        if close_netcdf_data:
            self.netcdf_data.close()

        return self.data

    def process_data(self, data, **kwargs):
        """
        Defines the steps needed to convert raw forecast data
        into processed forecast data. Most forecast models implement
        their own version of this method which also call this one.

        Parameters
        ----------
        data: DataFrame
            Raw forecast data

        Returns
        -------
        data: DataFrame
            Processed forecast data.
        """
        data = self.rename(data)
        return data

    def get_processed_data(self, *args, **kwargs):
        """
        Get and process forecast data.

        Parameters
        ----------
        *args: positional arguments
            Passed to get_data
        **kwargs: keyword arguments
            Passed to get_data and process_data

        Returns
        -------
        data: DataFrame
            Processed forecast data
        """
        return self.process_data(self.get_data(*args, **kwargs), **kwargs)

    def rename(self, data, variables=None):
        """
        Renames the columns according the variable mapping.

        Parameters
        ----------
        data: DataFrame
        variables: None or dict
            If None, uses self.variables

        Returns
        -------
        data: DataFrame
            Renamed data.
        """
        if variables is None:
            variables = self.variables
        return data.rename(columns={y: x for x, y in variables.items()})

    def _netcdf2pandas(self, netcdf_data, query_variables):
        """
        Transforms data from netcdf to pandas DataFrame.

        Parameters
        ----------
        data: netcdf
            Data returned from UNIDATA NCSS query.
        query_variables: list
            The variables requested.

        Returns
        -------
        pd.DataFrame
        """
        # set self.time
        try:
            time_var = 'time'
            self.set_time(netcdf_data.variables[time_var])
        except KeyError:
            # which model does this dumb thing?
            time_var = 'time1'
            self.set_time(netcdf_data.variables[time_var])

        data_dict = {key: data[:].squeeze() for key, data in
                     netcdf_data.variables.items() if key in query_variables}

        return pd.DataFrame(data_dict, index=self.time)

    def set_time(self, time):
        '''
        Converts time data into a pandas date object.

        Parameters
        ----------
        time: netcdf
            Contains time information.

        Returns
        -------
        pandas.DatetimeIndex
        '''
        times = num2date(time[:].squeeze(), time.units)
        self.time = pd.DatetimeIndex(pd.Series(times), tz=self.location.tz)

    def cloud_cover_to_ghi_linear(self, cloud_cover, ghi_clear, offset=35,
                                  **kwargs):
        """
        Convert cloud cover to GHI using a linear relationship.

        0% cloud cover returns ghi_clear.

        100% cloud cover returns offset*ghi_clear.

        Parameters
        ----------
        cloud_cover: numeric
            Cloud cover in %.
        ghi_clear: numeric
            GHI under clear sky conditions.
        offset: numeric
            Determines the minimum GHI.
        kwargs
            Not used.

        Returns
        -------
        ghi: numeric
            Estimated GHI.

        References
        ----------
        Larson et. al. "Day-ahead forecasting of solar power output from
        photovoltaic plants in the American Southwest" Renewable Energy
        91, 11-20 (2016).
        """

        offset = offset / 100.
        cloud_cover = cloud_cover / 100.
        ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear
        return ghi

    def cloud_cover_to_irradiance_clearsky_scaling(self, cloud_cover,
                                                   method='linear',
                                                   **kwargs):
        """
        Estimates irradiance from cloud cover in the following steps:

        1. Determine clear sky GHI using Ineichen model and
           climatological turbidity.
        2. Estimate cloudy sky GHI using a function of
           cloud_cover e.g.
           :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear`
        3. Estimate cloudy sky DNI using the DISC model.
        4. Calculate DHI from DNI and DHI.

        Parameters
        ----------
        cloud_cover : Series
            Cloud cover in %.
        method : str
            Method for converting cloud cover to GHI.
            'linear' is currently the only option.
        **kwargs
            Passed to the method that does the conversion

        Returns
        -------
        irrads : DataFrame
            Estimated GHI, DNI, and DHI.
        """
        solpos = self.location.get_solarposition(cloud_cover.index)
        cs = self.location.get_clearsky(cloud_cover.index, model='ineichen',
                                        solar_position=solpos)

        method = method.lower()
        if method == 'linear':
            ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'],
                                                 **kwargs)
        else:
            raise ValueError('invalid method argument')

        dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni']
        dhi = ghi - dni * np.cos(np.radians(solpos['zenith']))

        irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0)
        return irrads

    def cloud_cover_to_transmittance_linear(self, cloud_cover, offset=0.75,
                                            **kwargs):
        """
        Convert cloud cover to atmospheric transmittance using a linear
        model.

        0% cloud cover returns offset.

        100% cloud cover returns 0.

        Parameters
        ----------
        cloud_cover : numeric
            Cloud cover in %.
        offset : numeric
            Determines the maximum transmittance.
        kwargs
            Not used.

        Returns
        -------
        ghi : numeric
            Estimated GHI.
        """
        transmittance = ((100.0 - cloud_cover) / 100.0) * 0.75

        return transmittance

    def cloud_cover_to_irradiance_liujordan(self, cloud_cover, **kwargs):
        """
        Estimates irradiance from cloud cover in the following steps:

        1. Determine transmittance using a function of cloud cover e.g.
           :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear`
        2. Calculate GHI, DNI, DHI using the
           :py:func:`pvlib.irradiance.liujordan` model

        Parameters
        ----------
        cloud_cover : Series

        Returns
        -------
        irradiance : DataFrame
            Columns include ghi, dni, dhi
        """
        # in principle, get_solarposition could use the forecast
        # pressure, temp, etc., but the cloud cover forecast is not
        # accurate enough to justify using these minor corrections
        solar_position = self.location.get_solarposition(cloud_cover.index)
        dni_extra = extraradiation(cloud_cover.index)
        airmass = self.location.get_airmass(cloud_cover.index)

        transmittance = self.cloud_cover_to_transmittance_linear(cloud_cover,
                                                                 **kwargs)

        irrads = liujordan(solar_position['apparent_zenith'],
                           transmittance, airmass['airmass_absolute'],
                           dni_extra=dni_extra)
        irrads = irrads.fillna(0)

        return irrads

    def cloud_cover_to_irradiance(self, cloud_cover, how='clearsky_scaling',
                                  **kwargs):
        """
        Convert cloud cover to irradiance. A wrapper method.

        Parameters
        ----------
        cloud_cover : Series
        how : str
            Selects the method for conversion. Can be one of
            clearsky_scaling or liujordan.
        **kwargs
            Passed to the selected method.

        Returns
        -------
        irradiance : DataFrame
            Columns include ghi, dni, dhi
        """

        how = how.lower()
        if how == 'clearsky_scaling':
            irrads = self.cloud_cover_to_irradiance_clearsky_scaling(
                cloud_cover, **kwargs)
        elif how == 'liujordan':
            irrads = self.cloud_cover_to_irradiance_liujordan(
                cloud_cover, **kwargs)
        else:
            raise ValueError('invalid how argument')

        return irrads

    def kelvin_to_celsius(self, temperature):
        """
        Converts Kelvin to celsius.

        Parameters
        ----------
        temperature: numeric

        Returns
        -------
        temperature: numeric
        """
        return temperature - 273.15

    def isobaric_to_ambient_temperature(self, data):
        """
        Calculates temperature from isobaric temperature.

        Parameters
        ----------
        data: DataFrame
            Must contain columns pressure, temperature_iso,
            temperature_dew_iso. Input temperature in K.

        Returns
        -------
        temperature : Series
            Temperature in K
        """

        P = data['pressure'] / 100.0
        Tiso = data['temperature_iso']
        Td = data['temperature_dew_iso'] - 273.15

        # saturation water vapor pressure
        e = 6.11 * 10**((7.5 * Td) / (Td + 273.3))

        # saturation water vapor mixing ratio
        w = 0.622 * (e / (P - e))

        T = Tiso - ((2.501 * 10.**6) / 1005.7) * w

        return T

    def uv_to_speed(self, data):
        """
        Computes wind speed from wind components.

        Parameters
        ----------
        data : DataFrame
            Must contain the columns 'wind_speed_u' and 'wind_speed_v'.

        Returns
        -------
        wind_speed : Series
        """
        wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2)

        return wind_speed

    def gust_to_speed(self, data, scaling=1/1.4):
        """
        Computes standard wind speed from gust.
        Very approximate and location dependent.

        Parameters
        ----------
        data : DataFrame
            Must contain the column 'wind_speed_gust'.

        Returns
        -------
        wind_speed : Series
        """
        wind_speed = data['wind_speed_gust'] * scaling

        return wind_speed
Beispiel #29
0
class ForecastModel(object):
    '''
    An object for holding forecast model information for use within the 
    pvlib library.

    Simplifies use of siphon library on a THREDDS server.

    Parameters
    ----------
    model_type: string
        UNIDATA category in which the model is located.
    model_name: string
        Name of the UNIDATA forecast model.
    set_type: string
        Model dataset type.

    Attributes
    ----------
    access_url: string
        URL specifying the dataset from data will be retrieved.
    base_tds_url : string
        The top level server address
    catalog_url : string
        The url path of the catalog to parse.
    columns: list
        List of headers used to create the data DataFrame.
    data: pd.DataFrame
        Data returned from the query.
    data_format: string
        Format of the forecast data being requested from UNIDATA.
    dataset: Dataset
        Object containing information used to access forecast data.
    dataframe_variables: list
        Model variables that are present in the data.
    datasets_list: list
        List of all available datasets.
    fm_models: Dataset
        Object containing all available foreast models.
    fm_models_list: list
        List of all available forecast models from UNIDATA.
    latitude: list
        A list of floats containing latitude values.
    location: Location
        A pvlib Location object containing geographic quantities.
    longitude: list
        A list of floats containing longitude values.
    lbox: boolean
        Indicates the use of a location bounding box.
    ncss: NCSS object
        NCSS    model_name: string
        Name of the UNIDATA forecast model.    
    model: Dataset
        A dictionary of Dataset object, whose keys are the name of the
        dataset's name.
    model_url: string
        The url path of the dataset to parse.
    modelvariables: list
        Common variable names that correspond to queryvariables.
    query: NCSS query object
        NCSS object used to complete the forecast data retrival.
    queryvariables: list
        Variables that are used to query the THREDDS Data Server.
    rad_type: dictionary
        Dictionary labeling the method used for calculating radiation values.
    time: datetime
        Time range specified for the NCSS query.
    utctime: DatetimeIndex
        Time range in UTC.
    var_stdnames: dictionary
        Dictionary containing the standard names of the variables in the
        query, where the keys are the common names.
    var_units: dictionary
        Dictionary containing the unites of the variables in the query,
        where the keys are the common names.
    variables: dictionary
        Dictionary that translates model specific variables to 
        common named variables.
    vert_level: float or integer
        Vertical altitude for query data.
    wind_type: string
        Quantity that was used to calculate wind_speed.
    zenith: numpy.array
        Solar zenith angles for the given time range.
    '''

    access_url_key = 'NetcdfSubset'
    catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml'
    base_tds_url = catalog_url.split('/thredds/')[0]
    data_format = 'netcdf'
    vert_level = 100000
    columns = np.array(['temperature',
                        'wind_speed',
                        'total_clouds',
                        'low_clouds',
                        'mid_clouds',
                        'high_clouds',
                        'dni',
                        'dhi',
                        'ghi', ])

    def __init__(self, model_type, model_name, set_type):
        self.model_type = model_type
        self.model_name = model_name
        self.set_type = set_type
        self.catalog = TDSCatalog(self.catalog_url)
        self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href)
        self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys()))
        
        try:
            model_url = self.fm_models.catalog_refs[model_name].href
        except ParseError:
            raise ParseError(self.model_name + ' model may be unavailable.')

        try:
            self.model = TDSCatalog(model_url)
        except HTTPError:
            raise HTTPError(self.model_name + ' model may be unavailable.')

        self.datasets_list = list(self.model.datasets.keys())
        self.set_dataset()


    def set_dataset(self):
        '''
        Retreives the designated dataset, creates NCSS object, and 
        initiates a NCSS query.

        '''
        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()        

    def set_query_latlon(self):
        '''
        Sets the NCSS query location latitude and longitude.

        '''
        if isinstance(self.longitude, list):
            self.lbox = True
            # west, east, south, north
            self.query.lonlat_box(self.latitude[0], self.latitude[1], 
                                    self.longitude[0], self.longitude[1])
        else:
            self.lbox = False
            self.query.lonlat_point(self.longitude, self.latitude)

    def set_query_time(self):
        '''
        Sets the NCSS query time range.

        as: single or range

        '''        
        if len(self.utctime) == 1:
            self.query.time(pd.to_datetime(self.utctime)[0])
        else:
            self.query.time_range(pd.to_datetime(self.utctime)[0], 
                pd.to_datetime(self.utctime)[-1])
    
    def set_location(self, time):
        '''
        Sets the location for 

        Parameters
        ----------
        time: datetime or DatetimeIndex
            Time range of the query.
        '''
        if isinstance(time, datetime.datetime):
            tzinfo = time.tzinfo
        else:
            tzinfo = time.tz

        if tzinfo is None:
            self.location = Location(self.latitude, self.longitude)
        else:
            self.location = Location(self.latitude, self.longitude, tz=tzinfo)

    def get_query_data(self, latitude, longitude, time, vert_level=None, 
        variables=None):
        '''
        Submits a query to the UNIDATA servers using siphon NCSS and 
        converts the netcdf data to a pandas DataFrame.

        Parameters
        ----------
        latitude: list
            A list of floats containing latitude values.
        longitude: list
            A list of floats containing longitude values.
        time: pd.datetimeindex
            Time range of interest.
        vert_level: float or integer
            Vertical altitude of interest.
        variables: dictionary
            Variables and common names being queried.

        Returns
        -------
        pd.DataFrame
        '''
        if vert_level is not None:
            self.vert_level = vert_level
        if variables is not None:
            self.variables = variables
            self.modelvariables = list(self.variables.keys())
            self.queryvariables = [self.variables[key] for key in \
                self.modelvariables]
            self.columns = self.modelvariables
            self.dataframe_variables = self.modelvariables
        

        self.latitude = latitude
        self.longitude = longitude
        self.set_query_latlon()
        self.set_location(time)

        self.utctime = localize_to_utc(time, self.location)
        self.set_query_time()

        self.query.vertical_level(self.vert_level)
        self.query.variables(*self.queryvariables)
        self.query.accept(self.data_format)
        netcdf_data = self.ncss.get_data(self.query)

        try:
            time_var = 'time'
            self.set_time(netcdf_data.variables[time_var])
        except KeyError:
            time_var = 'time1'
            self.set_time(netcdf_data.variables[time_var])

        self.data = self.netcdf2pandas(netcdf_data)

        self.set_variable_units(netcdf_data)
        self.set_variable_stdnames(netcdf_data)
        if self.__class__.__name__ is 'HRRR':
            self.calc_temperature(netcdf_data)
        self.convert_temperature()
        self.calc_wind(netcdf_data)
        self.calc_radiation(netcdf_data)

        self.data = self.data.tz_convert(self.location.tz)

        netcdf_data.close()        

        return self.data       

    def netcdf2pandas(self, data):
        '''
        Transforms data from netcdf  to pandas DataFrame.

        Currently only supports one-dimensional netcdf data.

        Parameters
        ----------
        data: netcdf
            Data returned from UNIDATA NCSS query.

        Returns
        -------
        pd.DataFrame
        '''
        if not self.lbox:
            ''' one-dimensional data '''
            data_dict = {}
            for var in self.dataframe_variables:
                data_dict[var] = pd.Series(
                    data[self.variables[var]][:].squeeze(), index=self.utctime)
            return pd.DataFrame(data_dict, columns=self.columns)
        else:
            return pd.DataFrame(columns=self.columns, index=self.utctime)

    def set_time(self, time):
        '''
        Converts time data into a pandas date object.

        Parameters
        ----------
        time: netcdf
            Contains time information.

        Returns
        -------
        pandas.DatetimeIndex
        '''
        times = num2date(time[:].squeeze(), time.units)
        self.time = pd.DatetimeIndex(pd.Series(times), tz='UTC')
        self.time = self.time.tz_convert(self.location.tz)
        self.utctime = localize_to_utc(self.time, self.location.tz)

    def set_variable_units(self, data):
        '''
        Extracts variable unit information from netcdf data.

        Parameters
        ----------
        data: netcdf
            Contains queried variable information.

        '''
        self.var_units = {}
        for var in self.variables:
            self.var_units[var] = data[self.variables[var]].units

    def set_variable_stdnames(self, data):
        '''
        Extracts standard names from netcdf data.

        Parameters
        ----------
        data: netcdf
            Contains queried variable information.

        '''
        self.var_stdnames = {}
        for var in self.variables:
            try:
                self.var_stdnames[var] = \
                    data[self.variables[var]].standard_name
            except AttributeError:
                self.var_stdnames[var] = var

    def calc_radiation(self, data, cloud_type='total_clouds'):
        '''
        Determines shortwave radiation values if they are missing from 
        the model data.

        Parameters
        ----------
        data: netcdf
            Query data formatted in netcdf format.
        cloud_type: string
            Type of cloud cover to use for calculating radiation values.
        '''
        self.rad_type = {}
        if not self.lbox and cloud_type in self.modelvariables:           
            cloud_prct = self.data[cloud_type]
            solpos = get_solarposition(self.time, self.location)
            self.zenith = np.array(solpos.zenith.tz_convert('UTC'))
            for rad in ['dni','dhi','ghi']:
                if self.model_name is 'HRRR_ESRL':
                    # HRRR_ESRL is the only model with the 
                    # correct equation of time.
                    if rad in self.modelvariables:
                        self.data[rad] = pd.Series(
                            data[self.variables[rad]][:].squeeze(), 
                            index=self.time)
                        self.rad_type[rad] = 'forecast'
                        self.data[rad].fillna(0, inplace=True)
                else:
                    for rad in ['dni','dhi','ghi']:
                        self.rad_type[rad] = 'liujordan'
                        self.data[rad] = liujordan(self.zenith, cloud_prct)[rad]
                        self.data[rad].fillna(0, inplace=True)

            for var in ['dni', 'dhi', 'ghi']:
                self.data[var].fillna(0, inplace=True)
                self.var_units[var] = '$W m^{-2}$'

    def convert_temperature(self):
        '''
        Converts Kelvin to celsius.

        '''
        if 'Temperature_surface' in self.queryvariables or 'Temperature_isobaric' in self.queryvariables:
            self.data['temperature'] -= 273.15
            self.var_units['temperature'] = 'C'

    def calc_temperature(self, data):
        '''
        Calculates temperature (in degrees C) from isobaric temperature.

        Parameters
        ----------
        data: netcdf
            Query data in netcdf format.
        '''
        P = data['Pressure_surface'][:].squeeze() / 100.0
        Tiso = data['Temperature_isobaric'][:].squeeze()
        Td = data['Dewpoint_temperature_isobaric'][:].squeeze() - 273.15
        e = 6.11 * 10**((7.5 * Td) / (Td + 273.3))
        w = 0.622 * (e / (P - e))

        T = Tiso - ((2.501 * 10.**6) / 1005.7) * w

        self.data['temperature'] = T

    def calc_wind(self, data):
        '''
        Computes wind speed. 

        In some cases only gust wind speed is available. The wind_type 
        attribute will indicate the type of wind speed that is present.

        Parameters
        ----------
        data: netcdf
            Query data in netcdf format.
        '''
        if not self.lbox:
            if 'u-component_of_wind_isobaric' in self.queryvariables and \
                'v-component_of_wind_isobaric' in self.queryvariables:
                wind_data = np.sqrt(\
                    data['u-component_of_wind_isobaric'][:].squeeze()**2 +
                    data['v-component_of_wind_isobaric'][:].squeeze()**2)
                self.wind_type = 'component'
            elif 'Wind_speed_gust_surface' in self.queryvariables:
                wind_data = data['Wind_speed_gust_surface'][:].squeeze()
                self.wind_type = 'gust'

            if 'wind_speed' in self.data:
                self.data['wind_speed'] = pd.Series(wind_data, index=self.time)
                self.var_units['wind_speed'] = 'm/s'
import cartopy.feature as cfeature
import matplotlib.gridspec as gridspec
import matplotlib.pylab as plt
import metpy.calc as mpcalc
from metpy.units import units
from netCDF4 import num2date
import numpy as np
import scipy.ndimage as ndimage
from siphon.ncss import NCSS

#######################################
# Data Aquisition
# ---------------

# Open the example netCDF data
ncss = NCSS('https://www.ncei.noaa.gov/thredds/ncss/grid/namanl/'
            '201604/20160416/namanl_218_20160416_1800_000.grb')
now = datetime.utcnow()

# Query for Latest GFS Run
hgt = ncss.query().time(datetime(2016, 4, 16, 18)).accept('netcdf')
hgt.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric',
              'v-component_of_wind_isobaric').add_lonlat()

# Actually getting the data
ds = ncss.get_data(hgt)

lon = ds.variables['lon'][:]
lat = ds.variables['lat'][:]

times = ds.variables[
    ds.variables['Geopotential_height_isobaric'].dimensions[0]]
Beispiel #31
0
import cartopy.feature as cfeature
import cartopy.util as cutil
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import metpy.calc as mpcalc
from netCDF4 import num2date
import numpy as np
import scipy.ndimage as ndimage
from siphon.catalog import TDSCatalog
from siphon.ncss import NCSS

# Latest GFS Dataset
cat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/'
                 'NCEP/GFS/Global_0p5deg/latest.xml')
best_ds = list(cat.datasets.values())[0]
ncss = NCSS(best_ds.access_urls['NetcdfSubset'])

# Set the time to current
now = datetime.utcnow()

# Query for Latest GFS Run
gfsdata = ncss.query().time(now).accept('netcdf4')
gfsdata.variables('Geopotential_height_isobaric',
                  'u-component_of_wind_isobaric',
                  'v-component_of_wind_isobaric').add_lonlat()

# Set the lat/lon box for the data you want to pull in.
# lonlat_box(north_lat,south_lat,east_lon,west_lon)
gfsdata.lonlat_box(0, 360, 0, 90)

# Set desired level 50000 = 50000 Pa = 500 hPa
Beispiel #32
0
class ForecastModel(object):
    '''
    An object for holding forecast model information for use within the 
    pvlib library.

    Simplifies use of siphon library on a THREDDS server.

    Parameters
    ----------
    model_type: string
        UNIDATA category in which the model is located.
    model_name: string
        Name of the UNIDATA forecast model.
    set_type: string
        Model dataset type.

    Attributes
    ----------
    access_url: string
        URL specifying the dataset from data will be retrieved.
    base_tds_url : string
        The top level server address
    catalog_url : string
        The url path of the catalog to parse.
    columns: list
        List of headers used to create the data DataFrame.
    data: pd.DataFrame
        Data returned from the query.
    data_format: string
        Format of the forecast data being requested from UNIDATA.
    dataset: Dataset
        Object containing information used to access forecast data.
    dataframe_variables: list
        Model variables that are present in the data.
    datasets_list: list
        List of all available datasets.
    fm_models: Dataset
        Object containing all available foreast models.
    fm_models_list: list
        List of all available forecast models from UNIDATA.
    latitude: list
        A list of floats containing latitude values.
    location: Location
        A pvlib Location object containing geographic quantities.
    longitude: list
        A list of floats containing longitude values.
    lbox: boolean
        Indicates the use of a location bounding box.
    ncss: NCSS object
        NCSS    model_name: string
        Name of the UNIDATA forecast model.    
    model: Dataset
        A dictionary of Dataset object, whose keys are the name of the
        dataset's name.
    model_url: string
        The url path of the dataset to parse.
    modelvariables: list
        Common variable names that correspond to queryvariables.
    query: NCSS query object
        NCSS object used to complete the forecast data retrival.
    queryvariables: list
        Variables that are used to query the THREDDS Data Server.
    rad_type: dictionary
        Dictionary labeling the method used for calculating radiation values.
    time: datetime
        Time range specified for the NCSS query.
    utctime: DatetimeIndex
        Time range in UTC.
    var_stdnames: dictionary
        Dictionary containing the standard names of the variables in the
        query, where the keys are the common names.
    var_units: dictionary
        Dictionary containing the unites of the variables in the query,
        where the keys are the common names.
    variables: dictionary
        Dictionary that translates model specific variables to 
        common named variables.
    vert_level: float or integer
        Vertical altitude for query data.
    wind_type: string
        Quantity that was used to calculate wind_speed.
    zenith: numpy.array
        Solar zenith angles for the given time range.
    '''

    access_url_key = 'NetcdfSubset'
    catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml'
    base_tds_url = catalog_url.split('/thredds/')[0]
    data_format = 'netcdf'
    vert_level = 100000
    columns = np.array([
        'temperature',
        'wind_speed',
        'total_clouds',
        'low_clouds',
        'mid_clouds',
        'high_clouds',
        'dni',
        'dhi',
        'ghi',
    ])

    def __init__(self, model_type, model_name, set_type):
        self.model_type = model_type
        self.model_name = model_name
        self.set_type = set_type
        self.catalog = TDSCatalog(self.catalog_url)
        self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href)
        self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys()))

        try:
            model_url = self.fm_models.catalog_refs[model_name].href
        except ParseError:
            raise ParseError(self.model_name + ' model may be unavailable.')

        try:
            self.model = TDSCatalog(model_url)
        except HTTPError:
            raise HTTPError(self.model_name + ' model may be unavailable.')

        self.datasets_list = list(self.model.datasets.keys())
        self.set_dataset()

    def set_dataset(self):
        '''
        Retreives the designated dataset, creates NCSS object, and 
        initiates a NCSS query.

        '''
        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()

    def set_query_latlon(self):
        '''
        Sets the NCSS query location latitude and longitude.

        '''
        if isinstance(self.longitude, list):
            self.lbox = True
            # west, east, south, north
            self.query.lonlat_box(self.latitude[0], self.latitude[1],
                                  self.longitude[0], self.longitude[1])
        else:
            self.lbox = False
            self.query.lonlat_point(self.longitude, self.latitude)

    def set_query_time(self):
        '''
        Sets the NCSS query time range.

        as: single or range

        '''
        if len(self.utctime) == 1:
            self.query.time(pd.to_datetime(self.utctime)[0])
        else:
            self.query.time_range(
                pd.to_datetime(self.utctime)[0],
                pd.to_datetime(self.utctime)[-1])

    def set_location(self, time):
        '''
        Sets the location for 

        Parameters
        ----------
        time: datetime or DatetimeIndex
            Time range of the query.
        '''
        if isinstance(time, datetime.datetime):
            tzinfo = time.tzinfo
        else:
            tzinfo = time.tz

        if tzinfo is None:
            self.location = Location(self.latitude, self.longitude)
        else:
            self.location = Location(self.latitude, self.longitude, tz=tzinfo)

    def get_query_data(self,
                       latitude,
                       longitude,
                       time,
                       vert_level=None,
                       variables=None):
        '''
        Submits a query to the UNIDATA servers using siphon NCSS and 
        converts the netcdf data to a pandas DataFrame.

        Parameters
        ----------
        latitude: list
            A list of floats containing latitude values.
        longitude: list
            A list of floats containing longitude values.
        time: pd.datetimeindex
            Time range of interest.
        vert_level: float or integer
            Vertical altitude of interest.
        variables: dictionary
            Variables and common names being queried.

        Returns
        -------
        pd.DataFrame
        '''
        if vert_level is not None:
            self.vert_level = vert_level
        if variables is not None:
            self.variables = variables
            self.modelvariables = list(self.variables.keys())
            self.queryvariables = [self.variables[key] for key in \
                self.modelvariables]
            self.columns = self.modelvariables
            self.dataframe_variables = self.modelvariables

        self.latitude = latitude
        self.longitude = longitude
        self.set_query_latlon()
        self.set_location(time)

        self.utctime = localize_to_utc(time, self.location)
        self.set_query_time()

        self.query.vertical_level(self.vert_level)
        self.query.variables(*self.queryvariables)
        self.query.accept(self.data_format)
        netcdf_data = self.ncss.get_data(self.query)

        try:
            time_var = 'time'
            self.set_time(netcdf_data.variables[time_var])
        except KeyError:
            time_var = 'time1'
            self.set_time(netcdf_data.variables[time_var])

        self.data = self.netcdf2pandas(netcdf_data)

        self.set_variable_units(netcdf_data)
        self.set_variable_stdnames(netcdf_data)
        if self.__class__.__name__ is 'HRRR':
            self.calc_temperature(netcdf_data)
        self.convert_temperature()
        self.calc_wind(netcdf_data)
        self.calc_radiation(netcdf_data)

        self.data = self.data.tz_convert(self.location.tz)

        netcdf_data.close()

        return self.data

    def netcdf2pandas(self, data):
        '''
        Transforms data from netcdf  to pandas DataFrame.

        Currently only supports one-dimensional netcdf data.

        Parameters
        ----------
        data: netcdf
            Data returned from UNIDATA NCSS query.

        Returns
        -------
        pd.DataFrame
        '''
        if not self.lbox:
            ''' one-dimensional data '''
            data_dict = {}
            for var in self.dataframe_variables:
                data_dict[var] = pd.Series(
                    data[self.variables[var]][:].squeeze(), index=self.utctime)
            return pd.DataFrame(data_dict, columns=self.columns)
        else:
            return pd.DataFrame(columns=self.columns, index=self.utctime)

    def set_time(self, time):
        '''
        Converts time data into a pandas date object.

        Parameters
        ----------
        time: netcdf
            Contains time information.

        Returns
        -------
        pandas.DatetimeIndex
        '''
        times = num2date(time[:].squeeze(), time.units)
        self.time = pd.DatetimeIndex(pd.Series(times), tz='UTC')
        self.time = self.time.tz_convert(self.location.tz)
        self.utctime = localize_to_utc(self.time, self.location.tz)

    def set_variable_units(self, data):
        '''
        Extracts variable unit information from netcdf data.

        Parameters
        ----------
        data: netcdf
            Contains queried variable information.

        '''
        self.var_units = {}
        for var in self.variables:
            self.var_units[var] = data[self.variables[var]].units

    def set_variable_stdnames(self, data):
        '''
        Extracts standard names from netcdf data.

        Parameters
        ----------
        data: netcdf
            Contains queried variable information.

        '''
        self.var_stdnames = {}
        for var in self.variables:
            try:
                self.var_stdnames[var] = \
                    data[self.variables[var]].standard_name
            except AttributeError:
                self.var_stdnames[var] = var

    def calc_radiation(self, data, cloud_type='total_clouds'):
        '''
        Determines shortwave radiation values if they are missing from 
        the model data.

        Parameters
        ----------
        data: netcdf
            Query data formatted in netcdf format.
        cloud_type: string
            Type of cloud cover to use for calculating radiation values.
        '''
        self.rad_type = {}
        if not self.lbox and cloud_type in self.modelvariables:
            cloud_prct = self.data[cloud_type]
            solpos = get_solarposition(self.time, self.location)
            self.zenith = np.array(solpos.zenith.tz_convert('UTC'))
            for rad in ['dni', 'dhi', 'ghi']:
                if self.model_name is 'HRRR_ESRL':
                    # HRRR_ESRL is the only model with the
                    # correct equation of time.
                    if rad in self.modelvariables:
                        self.data[rad] = pd.Series(
                            data[self.variables[rad]][:].squeeze(),
                            index=self.time)
                        self.rad_type[rad] = 'forecast'
                        self.data[rad].fillna(0, inplace=True)
                else:
                    for rad in ['dni', 'dhi', 'ghi']:
                        self.rad_type[rad] = 'liujordan'
                        self.data[rad] = liujordan(self.zenith,
                                                   cloud_prct)[rad]
                        self.data[rad].fillna(0, inplace=True)

            for var in ['dni', 'dhi', 'ghi']:
                self.data[var].fillna(0, inplace=True)
                self.var_units[var] = '$W m^{-2}$'

    def convert_temperature(self):
        '''
        Converts Kelvin to celsius.

        '''
        if 'Temperature_surface' in self.queryvariables or 'Temperature_isobaric' in self.queryvariables:
            self.data['temperature'] -= 273.15
            self.var_units['temperature'] = 'C'

    def calc_temperature(self, data):
        '''
        Calculates temperature (in degrees C) from isobaric temperature.

        Parameters
        ----------
        data: netcdf
            Query data in netcdf format.
        '''
        P = data['Pressure_surface'][:].squeeze() / 100.0
        Tiso = data['Temperature_isobaric'][:].squeeze()
        Td = data['Dewpoint_temperature_isobaric'][:].squeeze() - 273.15
        e = 6.11 * 10**((7.5 * Td) / (Td + 273.3))
        w = 0.622 * (e / (P - e))

        T = Tiso - ((2.501 * 10.**6) / 1005.7) * w

        self.data['temperature'] = T

    def calc_wind(self, data):
        '''
        Computes wind speed. 

        In some cases only gust wind speed is available. The wind_type 
        attribute will indicate the type of wind speed that is present.

        Parameters
        ----------
        data: netcdf
            Query data in netcdf format.
        '''
        if not self.lbox:
            if 'u-component_of_wind_isobaric' in self.queryvariables and \
                'v-component_of_wind_isobaric' in self.queryvariables:
                wind_data = np.sqrt(\
                    data['u-component_of_wind_isobaric'][:].squeeze()**2 +
                    data['v-component_of_wind_isobaric'][:].squeeze()**2)
                self.wind_type = 'component'
            elif 'Wind_speed_gust_surface' in self.queryvariables:
                wind_data = data['Wind_speed_gust_surface'][:].squeeze()
                self.wind_type = 'gust'

            if 'wind_speed' in self.data:
                self.data['wind_speed'] = pd.Series(wind_data, index=self.time)
                self.var_units['wind_speed'] = 'm/s'
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import metpy.calc as mpcalc
from metpy.units import units
from netCDF4 import num2date
import numpy as np
import scipy.ndimage as ndimage
from siphon.ncss import NCSS

########################################
# Set up access to the data

# Create NCSS object to access the NetcdfSubset
base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/gfs-g4-anl-files/'
dt = datetime(2016, 8, 22, 18)
ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/gfsanl_4_{dt:%Y%m%d}_'
            '{dt:%H}00_003.grb2'.format(base_url, dt=dt))

# Create lat/lon box for location you want to get data for
query = ncss.query()
query.lonlat_box(north=50, south=30, east=-80, west=-115)
query.time(datetime(2016, 8, 22, 21))

# Request data for geopotential height
query.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric',
                'v-component_of_wind_isobaric')
query.vertical_level(100000)
data = ncss.get_data(query)

# Pull out variables you want to use
height_var = data.variables['Geopotential_height_isobaric']
u_wind_var = data.variables['u-component_of_wind_isobaric']
Beispiel #34
0
def get_ensemble_point(point,
                       variables=['Temperature_height_above_ground_ens'],
                       start=datetime.utcnow() - timedelta(hours=12),
                       end=datetime.utcnow() + timedelta(hours=48)):
    """
    Retrieves the latest ("best") ensemble forecast valid at a single point from the Unidata THREDDS server using
    the Unidata siphon library.
    
    Requires:
    point -> A tuple of (lat, lon) of the point we are trying to retrieve
    variables -> A list of variables we want to retrieve.  Check this page for a full list:
            http://thredds.ucar.edu/thredds/metadata/grib/NCEP/GEFS/Global_1p0deg_Ensemble/members/Best?metadata=variableMap
    start -> A datetime object of the earliest time to look for an ensemble initialization,
            default is current time minus 12 hours
    end -> The last time for which we want ensemble forecast output.  Default is current time plus 48 hours.
    
    Returns:
    A dictionary with one item being the list of valid times in the data ('times') and the rest of the items
    being numpy arrays of nTimes x nEnsmems for each variable requested
        
    """
    # Import the Siphon utilities
    from siphon.catalog import TDSCatalog
    from siphon.ncss import NCSS

    # In Siphon, we connect to a thredds catalog.  Here's the address for the GEFS
    catalog = 'http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GEFS/Global_1p0deg_Ensemble/members/catalog.xml'
    best_model = TDSCatalog(catalog)

    # We select a specific dataset in this catalog, in this case the "best" (most recent) ensemble run
    best_ds = list(best_model.datasets.values())[2]
    ncss = NCSS(best_ds.access_urls['NetcdfSubset'])

    # Here we format our subsetting query.  We specify the exact point we want,
    # the time range, and the variables we are requesting.  We're also going
    # to retrieve the data in a netcdf-like format
    query = ncss.query()
    query.lonlat_point(point[1], point[0])
    query.time_range(start, end)
    query.variables(*variables)
    query.accept('netcdf')

    # Actually get the data
    data = ncss.get_data(query)

    # Format our output into a dictionary
    output = {}
    for v in variables:
        # After the squeeze, this is a nTimes x nEns array
        output[v] = np.squeeze(data.variables[v][:])
        #print output[v].shape
    # Also, add times
    # The 'time' variable is hours since "time_coverage_start"
    # Get this in datetime format
    raw_hours = list(np.squeeze(data.variables['time'][:]))
    init_time = datetime.strptime(str(data.time_coverage_start),
                                  '%Y-%m-%dT%H:%M:%SZ')
    output['times'] = [init_time + timedelta(hours=int(x)) for x in raw_hours]

    # Return a dictionary
    return output
Beispiel #35
0
class TestNCSS(object):
    """Test NCSS queries and response parsing."""

    server = 'http://thredds.ucar.edu/thredds/ncss/'
    urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2'

    @recorder.use_cassette('ncss_test_metadata')
    def setup(self):
        """Set up for tests with a default valid query."""
        dt = datetime(2015, 6, 12, 15, 0, 0)
        self.ncss = NCSS(self.server + self.urlPath)
        self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt)
        self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')

    def test_good_query(self):
        """Test that a good query is properly validated."""
        assert self.ncss.validate_query(self.nq)

    def test_bad_query(self):
        """Test that a query with an unknown variable is invalid."""
        self.nq.variables('foo')
        assert not self.ncss.validate_query(self.nq)

    def test_empty_query(self):
        """Test that an empty query is invalid."""
        query = self.ncss.query()
        res = self.ncss.validate_query(query)
        assert not res
        assert not isinstance(res, set)

    def test_bad_query_no_vars(self):
        """Test that a query without variables is invalid."""
        self.nq.var.clear()
        assert not self.ncss.validate_query(self.nq)

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_xml_point(self):
        """Test parsing XML point returns."""
        self.nq.accept('xml')
        xml_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in xml_data
        assert 'Relative_humidity_isobaric' in xml_data
        assert xml_data['lat'][0] == 40
        assert xml_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_csv_point(self):
        """Test parsing CSV point returns."""
        self.nq.accept('csv')
        csv_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in csv_data
        assert 'Relative_humidity_isobaric' in csv_data
        assert csv_data['lat'][0] == 40
        assert csv_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unit_handler_csv(self):
        """Test unit-handling from CSV returns."""
        self.nq.accept('csv')
        self.ncss.unit_handler = tuple_unit_handler
        csv_data = self.ncss.get_data(self.nq)

        temp = csv_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = csv_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_unit_handler_xml(self):
        """Test unit-handling from XML returns."""
        self.nq.accept('xml')
        self.ncss.unit_handler = tuple_unit_handler
        xml_data = self.ncss.get_data(self.nq)

        temp = xml_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = xml_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_netcdf_point')
    def test_netcdf_point(self):
        """Test handling of netCDF point returns."""
        self.nq.accept('netcdf')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_netcdf4_point')
    def test_netcdf4_point(self):
        """Test handling of netCDF4 point returns."""
        self.nq.accept('netcdf4')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_vertical_level')
    def test_vertical_level(self):
        """Test data return from a single vertical level is correct."""
        self.nq.accept('csv').vertical_level(50000)
        csv_data = self.ncss.get_data(self.nq)

        np.testing.assert_almost_equal(csv_data['Temperature_isobaric'], np.array([263.40]), 2)

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_raw_csv(self):
        """Test CSV point return from a GFS request."""
        self.nq.accept('csv')
        csv_data = self.ncss.get_data_raw(self.nq)

        assert csv_data.startswith(b'date,lat')

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unknown_mime(self):
        """Test handling of unknown mimetypes."""
        self.nq.accept('csv')
        with response_context():
            csv_data = self.ncss.get_data(self.nq)
            assert csv_data.startswith(b'date,lat')
Beispiel #36
0
        for j in range(latitude.shape[0]):
            _, _, dx[j, i] = g.inv(longitude[j, i], latitude[j, i],
                                   longitude[j, i + 1], latitude[j, i + 1])
    dx[:, i + 1] = dx[:, i]

    xdiff_sign = np.sign(longitude[0, 1] - longitude[0, 0])
    ydiff_sign = np.sign(latitude[1, 0] - latitude[0, 0])
    return xdiff_sign * dx * units.meter, ydiff_sign * dy * units.meter


######################################
# Set up access to the data

# Create NCSS object to access the NetcdfSubset
ncss = NCSS(
    'https://nomads.ncdc.noaa.gov/thredds/ncss/grid/gfs-004-anl/201608/20160822/'
    'gfsanl_4_20160822_1800_003.grb2')

# Create lat/lon box for location you want to get data for
query = ncss.query()
query.lonlat_box(north=50, south=30, east=-80, west=-115)
query.time(datetime(2016, 8, 22, 21))

# Request data for geopotential height
query.variables('Geopotential_height', 'U-component_of_wind',
                'V-component_of_wind')
query.vertical_level(100000)
data = ncss.get_data(query)

# Pull out variables you want to use
height_var = data.variables['Geopotential_height']
Beispiel #37
0
from netCDF4 import num2date
import numpy as np
from metpy.units import units
import scipy.ndimage as ndimage
from siphon.catalog import TDSCatalog
from siphon.ncss import NCSS
# =============================================================================
# RETRIEVE RAP AND HRRR DATA
# =============================================================================
RAP = 'http://thredds-jetstream.unidata.ucar.edu/thredds/catalog/grib/NCEP/RAP/CONUS_20km/latest.xml'
HRRR= 'http://thredds-jetstream.unidata.ucar.edu/thredds/catalog/grib/NCEP/HRRR/CONUS_2p5km/latest.xml'
GFS = 'http://thredds-jetstream.unidata.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/latest.xml'

DATA = TDSCatalog(GFS)
BEST_DATA = list(DATA.datasets.values())[0]
NCSS_DATA = NCSS(BEST_DATA.access_urls['NetcdfSubset'])

NOW = datetime.utcnow()
LATEST_DATA = NCSS_DATA.query().time(NOW).accept('netcdf4')
# =============================================================================
# UPPER-AIR VARIABLES
# =============================================================================
# 250: JET STREAM, GEOPOTENTIAL HEIGHT, POTENTIAL VORTICITY, IRROTATIONAL WIND
def 250hPa_GFS_jet_stream_SLP(lon_west, lon_east, lat_south, lat_north):
       
def 250hPa_GFS_jet_stream_jet_dyn(lon_west, lon_east, lat_south, lat_north):
# 500: VORTICITY, GEOPOTENTIAL HEIGHT, VORTICITY ADVECTION
def 500hPa_GFS_vorticity(lon_west, lon_east, lat_south, lat_north):
       LATEST_DATA.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric').add_lonlat()
       LATEST_DATA.lonlat_box(lon_west, lon_east, lat_south, lat_north)
       LATEST_DATA.vertical_level(50000)
Beispiel #38
0
class TestNCSS(object):
    """Test NCSS queries and response parsing."""

    server = 'http://thredds.ucar.edu/thredds/ncss/'
    urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2'

    @recorder.use_cassette('ncss_test_metadata')
    def setup(self):
        """Set up for tests with a default valid query."""
        dt = datetime(2015, 6, 12, 15, 0, 0)
        self.ncss = NCSS(self.server + self.urlPath)
        self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt)
        self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')

    def test_good_query(self):
        """Test that a good query is properly validated."""
        assert self.ncss.validate_query(self.nq)

    def test_bad_query(self):
        """Test that a query with an unknown variable is invalid."""
        self.nq.variables('foo')
        assert not self.ncss.validate_query(self.nq)

    def test_empty_query(self):
        """Test that an empty query is invalid."""
        query = self.ncss.query()
        res = self.ncss.validate_query(query)
        assert not res
        assert not isinstance(res, set)

    def test_bad_query_no_vars(self):
        """Test that a query without variables is invalid."""
        self.nq.var.clear()
        assert not self.ncss.validate_query(self.nq)

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_xml_point(self):
        """Test parsing XML point returns."""
        self.nq.accept('xml')
        xml_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in xml_data
        assert 'Relative_humidity_isobaric' in xml_data
        assert xml_data['lat'][0] == 40
        assert xml_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_csv_point(self):
        """Test parsing CSV point returns."""
        self.nq.accept('csv')
        csv_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in csv_data
        assert 'Relative_humidity_isobaric' in csv_data
        assert csv_data['lat'][0] == 40
        assert csv_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unit_handler_csv(self):
        """Test unit-handling from CSV returns."""
        self.nq.accept('csv')
        self.ncss.unit_handler = tuple_unit_handler
        csv_data = self.ncss.get_data(self.nq)

        temp = csv_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = csv_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_unit_handler_xml(self):
        """Test unit-handling from XML returns."""
        self.nq.accept('xml')
        self.ncss.unit_handler = tuple_unit_handler
        xml_data = self.ncss.get_data(self.nq)

        temp = xml_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = xml_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_netcdf_point')
    def test_netcdf_point(self):
        """Test handling of netCDF point returns."""
        self.nq.accept('netcdf')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_netcdf4_point')
    def test_netcdf4_point(self):
        """Test handling of netCDF4 point returns."""
        self.nq.accept('netcdf4')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_vertical_level')
    def test_vertical_level(self):
        """Test data return from a single vertical level is correct."""
        self.nq.accept('csv').vertical_level(50000)
        csv_data = self.ncss.get_data(self.nq)

        np.testing.assert_almost_equal(csv_data['Temperature_isobaric'],
                                       np.array([263.40]), 2)

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_raw_csv(self):
        """Test CSV point return from a GFS request."""
        self.nq.accept('csv')
        csv_data = self.ncss.get_data_raw(self.nq)

        assert csv_data.startswith(b'date,lat')

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unknown_mime(self):
        """Test handling of unknown mimetypes."""
        self.nq.accept('csv')
        with response_context():
            csv_data = self.ncss.get_data(self.nq)
            assert csv_data.startswith(b'date,lat')
from ipywidgets import interact_manual
import matplotlib.pyplot as plt
import metpy.calc as mcalc
from metpy.units import units
from netCDF4 import num2date
import numpy as np
from siphon.ncss import NCSS

#######################################
# **Getting the data**
#
# In this example, NARR reanalysis data for 18 UTC 04 April 1987 from the National Centers
# for Environmental Information (https://nomads.ncdc.noaa.gov) will be used.

# Link to NetCDF subset service for NAM analysis data
ncss = NCSS('https://nomads.ncdc.noaa.gov/thredds/ncss/grid/narr-a/198704/19870404/'
            'narr-a_221_19870404_1800_000.grb')

# Bring in needed data
modeldata = ncss.query().all_times()
modeldata.variables('Geopotential_height',
                    'u_wind',
                    'v_wind',
                    'Temperature',
                    'Specific_humidity').add_lonlat()
# Set the lat/lon box for the data you want to pull in.
# lonlat_box(north_lat,south_lat,east_lon,west_lon)
modeldata.lonlat_box(-140, -60, 16, 60)

# Actually getting the data
data = ncss.get_data(modeldata)
Beispiel #40
0
def radar_plus_obs(station, my_datetime, radar_title=None, bb=None,
                   station_radius=75000., station_layout=simple_layout,
                  field='reflectivity', vmin=None, vmax=None,
                  sweep=0):
    if radar_title is None:
        radar_title = 'Area '

    radar = get_radar_from_aws(station, my_datetime)

    # Lets get some geographical context
    if bb is None:
        lats = radar.gate_latitude
        lons = radar.gate_longitude

        min_lon = lons['data'].min()
        min_lat = lats['data'].min()
        max_lat = lats['data'].max()
        max_lon = lons['data'].max()
        bb = {'north' : max_lat,
              'south' : min_lat,
              'east' : max_lon,
              'west' : min_lon}
    else:
        min_lon = bb['west']
        min_lat = bb['south']
        max_lon = bb['east']
        max_lat = bb['north']

    print('min_lat:', min_lat, ' min_lon:', min_lon,
          ' max_lat:', max_lat, ' max_lon:', max_lon)

    index_at_start = radar.sweep_start_ray_index['data'][sweep]
    time_at_start_of_radar = num2date(radar.time['data'][index_at_start],
                                      radar.time['units'])
    pacific = pytz.timezone('US/Central')
    local_time = pacific.fromutc(time_at_start_of_radar)
    fancy_date_string = local_time.strftime('%A %B %d at %I:%M %p %Z')
    print(fancy_date_string)

    metar_cat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/nws/metar/ncdecoded/catalog.xml?'
                           'dataset=nws/metar/ncdecoded/Metar_Station_Data_fc.cdmr')
    dataset = list(metar_cat.datasets.values())[0]
    ncss = NCSS(dataset.access_urls["NetcdfSubset"])

    query = ncss.query().accept('csv').time(time_at_start_of_radar)
    query.lonlat_box(north=max_lat, south=min_lat, east=max_lon, west=min_lon)
    query.variables('air_temperature', 'dew_point_temperature', 'inches_ALTIM',
                    'wind_speed', 'wind_from_direction', 'cloud_area_fraction', 'weather')
    data = ncss.get_data(query)

    lats = data['latitude'][:]
    lons = data['longitude'][:]
    tair = data['air_temperature'][:]
    dewp = data['dew_point_temperature'][:]
    slp = (data['inches_ALTIM'][:] * units('inHg')).to('mbar')

    # Convert wind to components
    u, v = mpcalc.get_wind_components(data['wind_speed'] * units.knot,
                                      data['wind_from_direction'] * units.deg)

    # Need to handle missing (NaN) and convert to proper code
    cloud_cover = 8 * data['cloud_area_fraction']
    cloud_cover[np.isnan(cloud_cover)] = 9
    cloud_cover = cloud_cover.astype(np.int)

    # For some reason these come back as bytes instead of strings
    stid = [s.decode() for s in data['station']]

    # Convert the text weather observations to WMO codes we can map to symbols
    #wx_text = [s.decode('ascii') for s in data['weather']]
    #wx_codes = np.array(list(to_code(wx_text)))

    sfc_data = {'latitude': lats, 'longitude': lons,
                'air_temperature': tair, 'dew_point_temperature': dewp, 'eastward_wind': u,
                'northward_wind': v, 'cloud_coverage': cloud_cover,
                'air_pressure_at_sea_level': slp}#, 'present_weather': wx_codes}

    fig = plt.figure(figsize=(10, 8))
    display = pyart.graph.RadarMapDisplayCartopy(radar)
    lat_0 = display.loc[0]
    lon_0 = display.loc[1]

    # Set our Projection
    projection = cartopy.crs.Mercator(central_longitude=lon_0,
                                      min_latitude=min_lat, max_latitude=max_lat)

    # Call our function to reduce data
    filter_data(sfc_data, projection, radius=station_radius, sort_key='present_weather')
    print(sweep)
    display.plot_ppi_map(
        field, sweep, colorbar_flag=True,
        title=radar_title +' area ' + field + ' \n' + fancy_date_string,
        projection=projection,
        min_lon=min_lon, max_lon=max_lon, min_lat=min_lat, max_lat=max_lat,
        vmin=vmin, vmax=vmax)

    # Mark the radar
    display.plot_point(lon_0, lat_0, label_text='Radar')

    # Get the current axes and plot some lat and lon lines
    gl = display.ax.gridlines(draw_labels=True,
                              linewidth=2, color='gray', alpha=0.5, linestyle='--')
    gl.xlabels_top = False
    gl.ylabels_right = False

    # Make the station plot
    stationplot = StationPlot(display.ax, sfc_data['longitude'], sfc_data['latitude'],
                              transform=cartopy.crs.PlateCarree(),
                              fontsize=12)
    station_layout.plot(stationplot, sfc_data)

    return display, time_at_start_of_radar
Beispiel #41
0
%matplotlib inline

# <headingcell level=2>

# Extract HRRR data using Unidata's Siphon package

# <codecell>

# Resolve the latest HRRR dataset
from siphon.catalog import TDSCatalog
latest_hrrr = TDSCatalog('http://thredds-jumbo.unidata.ucar.edu/thredds/catalog/grib/HRRR/CONUS_3km/surface/latest.xml')
hrrr_ds = list(latest_hrrr.datasets.values())[0]

# Set up access via NCSS
from siphon.ncss import NCSS
ncss = NCSS(hrrr_ds.access_urls['NetcdfSubset'])

# Create a query to ask for all times in netcdf4 format for
# the Temperature_surface variable, with a bounding box
query = ncss.query()

# <codecell>

dap_url = hrrr_ds.access_urls['OPENDAP']

# <codecell>

query.all_times().accept('netcdf4').variables('u-component_of_wind_height_above_ground',
                                              'v-component_of_wind_height_above_ground')
query.lonlat_box(45, 41., -63, -71.5)