longitude[j, i + 1], latitude[j, i + 1]) dx[:, i + 1] = dx[:, i] xdiff_sign = np.sign(longitude[0, 1] - longitude[0, 0]) ydiff_sign = np.sign(latitude[1, 0] - latitude[0, 0]) return xdiff_sign * dx * units.meter, ydiff_sign * dy * units.meter ############################################### # Create NCSS object to access the NetcdfSubset # --------------------------------------------- # Data from NOMADS GFS 0.5 deg Analysis Archive # https://www.ncdc.noaa.gov/data-access/model-data/model-datasets/global-forcast-system-gfs dt = datetime(2017, 4, 5, 12) ncss = NCSS( 'https://nomads.ncdc.noaa.gov/thredds/ncss/grid/gfs-004-anl/' '{0:%Y%m}/{0:%Y%m%d}/gfsanl_4_{0:%Y%m%d}_{0:%H}00_000.grb2'.format(dt)) # Create lat/lon box for location you want to get data for query = ncss.query().time(dt) query.lonlat_box(north=65, south=15, east=310, west=220) query.accept('netcdf4') # Request data for vorticity query.variables('Geopotential_height', 'Temperature', 'U-component_of_wind', 'V-component_of_wind') data = ncss.get_data(query) # Pull out variables you want to use hght_var = data.variables['Geopotential_height'] temp_var = data.variables['Temperature']
def get_data(self): # Request the GFS data from the thredds server gfs_url = f"https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/GFS_Global_0p25deg_{self.now.year}{self.now.month:02d}{self.now.day:02d}_0000.grib2/catalog.xml" gfs_cat = TDSCatalog(gfs_url) #https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml' dataset = list(gfs_cat.datasets.values())[0] #print(dataset.access_urls) # Create NCSS object to access the NetcdfSubset ncss = NCSS(dataset.access_urls['NetcdfSubset']) # query the data from the server query = ncss.query() query.time_range(self.start, self.end) query.lonlat_box(north=80, south=0, east=310, west=200) query.accept('netcdf4') print("-----------------------------------------\n"\ +"Sit back....\nOr get your coffee....\nOr do a Sudoku....\n"\ +"-----------------------------------------\n") print("qeueing data...") #query.variables(str(self.query_list)).add_lonlat(True) for i in self.query_list: query.variables(i) #query.variables(vort_name,hgt_name,pv_press_name,mslp_name,upflux_rad_name,u_name,v_name, # u_src_name,v_src_name,sfc_gust_name).add_lonlat(True) print("\ndone qeueing data.\n\ngrabbing data...\n") # Request data for the variables you want to use self.data = ncss.get_data(query) print("done grabbing data!!\n-_-_-_-_-_-_-_-_-_-_-_\n") return self.data
def query_point(self): print('Retrieving selected variables...') ds = self.connect() ncss = NCSS(ds.access_urls['NetcdfSubset']) query = ncss.query() now = datetime.utcnow() timestamp = self.end #converts time from datetime to standard time self.init_time = now.strftime('%Hz-%d-%Y') self.end_time = (now + pd.Timedelta(hours=timestamp)).strftime('%Hz-%d-%Y') query.time_range(now + pd.Timedelta(hours=self.start), now + pd.Timedelta(hours=timestamp)) query.accept('netcdf4') query.lonlat_point(self.lon, self.lat) #pull temperature, cloud cover, and precip data query.variables(self.variables) data = ncss.get_data(query) return data
def build_query(west=-58.5, east=32, south=42, north=74): metar = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/nws/metar/' 'ncdecoded/catalog.xml') dataset = list(metar.datasets.values())[0] print(list(dataset.access_urls)) # Access netcdf subset and use siphon to request data ncss_url = dataset.access_urls['NetcdfSubset'] ncss = NCSS(ncss_url) print(ncss.variables) # get current date and time now = datetime.utcnow() now = datetime(now.year, now.month, now.day, now.hour) # build the query query = ncss.query() query.lonlat_box(west, east, south, north) query.time(now) query.variables('air_temperature', 'dew_point_temperature', 'wind_speed', 'precipitation_amount_hourly', 'hectoPascal_ALTIM', 'air_pressure_at_sea_level', 'wind_from_direction', 'cloud_area_fraction', 'weather', 'report', 'wind_gust') query.accept('csv') return ncss, query
def ncss_subset(tds): ds_list = tds_connect(tds) dset1 = ds_list[0] ncss_obj = NCSS(dset1.access_urls['NetcdfSubset']) subset = ncss_obj.query() subset.lonlat_box(142, 149, -45, -38) subset.accept('netcdf') subset.variables('tasmax', 'tasmin') #data = ncss.get_data(subset) return subset
def get_obs(ts, mybb): # copied from the browser url box metar_cat_url = 'http://thredds.ucar.edu/thredds/catalog/nws/metar/ncdecoded/catalog.xml?dataset=nws/metar/ncdecoded/Metar_Station_Data_fc.cdmr' # parse the xml metar_cat = TDSCatalog(metar_cat_url) # what datasets are here? only one "dataset" in this catalog dataset = list(metar_cat.datasets.values())[0] ncss_url = dataset.access_urls["NetcdfSubset"] ncss = NCSS(ncss_url) query = ncss.query().accept('csv').time(ts - datetime.timedelta(minutes=1)) query.lonlat_box(**mybb) query.variables('air_temperature', 'dew_point_temperature', 'inches_ALTIM', 'wind_speed', 'wind_from_direction', 'cloud_area_fraction', 'weather') try: data = ncss.get_data(query) lats = data['latitude'][:] lons = data['longitude'][:] tair = data['air_temperature'][:] dewp = data['dew_point_temperature'][:] slp = (data['inches_ALTIM'][:] * units('inHg')).to('mbar') # Convert wind to components u, v = mpcalc.get_wind_components(data['wind_speed'] * units.knot, data['wind_from_direction'] * units.deg) # Need to handle missing (NaN) and convert to proper code cloud_cover = 8 * data['cloud_area_fraction'] cloud_cover[np.isnan(cloud_cover)] = 9 cloud_cover = cloud_cover.astype(np.int) # For some reason these come back as bytes instead of strings stid = [s.decode() for s in data['station']] # Convert the text weather observations to WMO codes we can map to symbols if data['weather'].dtype != bool: wx_text = [s.decode('ascii') for s in data['weather']] wx_codes = np.array(list(to_code(wx_text))) else: wx_codes = np.array([0] * len(data['weather'])) sfc_data = {'latitude': lats, 'longitude': lons, 'air_temperature': tair, 'dew_point_temperature': dewp, 'eastward_wind': u, 'northward_wind': v, 'cloud_coverage': cloud_cover, 'air_pressure_at_sea_level': slp, 'present_weather': wx_codes} have_obs = True except: have_obs = False sfc_data = {} return sfc_data, have_obs
def get_sounding(source, lat, long): # source unused for now bc testing only on ncss source_place_holder = source #print(source_place_holder) best_gfs = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/' + 'catalog.xml?dataset=grib/NCEP/GFS/Global_0p5deg/Best') best_ds = list(best_gfs.datasets.values())[0] ncss = NCSS(best_ds.access_urls['NetcdfSubset']) query = ncss.query() query.lonlat_point(long, lat).time(datetime.utcnow()) query.accept('netcdf4') query.variables('Temperature_isobaric', 'Relative_humidity_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric') data = ncss.get_data(query) temp = data.variables['Temperature_isobaric'] temp_vals = temp[:].squeeze() * units.kelvin relh = data.variables['Relative_humidity_isobaric'] relh_values = relh[:] / 100 td = dewpoint_rh(temp_vals, relh_values) td_vals = td[:].squeeze() press = data.variables['isobaric3'] press_vals = press[:].squeeze() u_wind = data.variables['u-component_of_wind_isobaric'] u_wind_vals = u_wind[:].squeeze() v_wind = data.variables['v-component_of_wind_isobaric'] v_wind_vals = v_wind[:].squeeze() # Put temp, dewpoint, pressure, u/v winds into numpy arrays and reorder t = np.array(temp_vals)[::-1] td = np.array(td_vals)[::-1] p = np.array(press_vals)[::-1] u = np.array(u_wind_vals)[::-1] v = np.array(v_wind_vals)[::-1] # Change units for proper skew-T p = (p * units.pascals).to('mbar') t = (t * units.kelvin).to('degC') td = td * units.degC u = (u * units('m/s')).to('knot') v = (v * units('m/s')).to('knot') # spd = spd * units.knot # direc = direc * units.deg # u, v = get_wind_components(spd, direc) return t, td, p, u, v, lat, long, str(datetime.utcnow())[:-7]
def get_closest_gfs(time, level, field): """ Retreive the current best 0.25 deg GFS model for a given field, level, time. time : datetime object level : level of results (in hPa) field : CF field to retrieve """ # Get the catalog and best GFS entry catalog = TDSCatalog( 'http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml' ) best_gfs = list(catalog.datasets.values())[1] # Using NCSS, build a query and getch the data ncss = NCSS(best_gfs.access_urls['NetcdfSubset']) query = ncss.query() query.lonlat_box(north=90, south=10, east=360, west=160) query.vertical_level(level) query.time(time) query.accept('netcdf4') query.variables(field) data = ncss.get_data(query) # Pull out the variables we will use lat_var = data.variables['lat'] lon_var = data.variables['lon'] data_var = data.variables[field] # Find the correct time dimension name for coord in data_var.coordinates.split(): if 'time' in coord: time_var = data.variables[coord] break # Convert number of hours since the reference time into an actual date time_vals = netCDF4.num2date(time_var[:].squeeze(), time_var.units) # Combine 1D latitude and longitudes into a 2D grid of locations lon_2d, lat_2d = np.meshgrid(lon_var[:], lat_var[:]) # Filter the data to smooth it out a bit data_var = ndimage.gaussian_filter(data_var[:][0][0], sigma=1.5, order=0) return time_vals, lat_2d, lon_2d, data_var
def return_gfs(): best_gfs = TDSCatalog( 'http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/catalog.xml?dataset=grib/NCEP/GFS/Global_0p5deg/Best' ) best_gfs.datasets best_ds = list(best_gfs.datasets.values())[0] best_ds.access_urls return NCSS(best_ds.access_urls['NetcdfSubset'])
def set_dataset(self): ''' Retrieves the designated dataset, creates NCSS object, and creates a NCSS query object. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query()
def get_data(lon_w, lon_e, lat_s, lat_n, variable): """TODO Add reset, change colors of map, variable selection, model selection, lat/long validator """ cat_url = 'http://thredds-jumbo.unidata.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml' latest_gfs = get_latest_access_url(cat_url, 'NetcdfSubset') ncss = NCSS(latest_gfs) query = ncss.query() query.lonlat_box(west=lon_w, east=lon_e, south=lat_s, north=lat_n).all_times() query.accept('netcdf4') query.variables(variable_dict(variable)) data = ncss.get_data(query) list(data.variables.keys()) var1 = data.variables[variable_dict(variable)] # only works if has name time+... or only has 1 dimension for dim in var1.dimensions: if 'time' in dim: time_name = dim if time_name is None: raise ValueError("Couldn't find a time dimension for " + var1.name) time_1d = data.variables[time_name] lat_1d = data.variables['lat'] lon_1d = data.variables['lon'] # Reduce the dimensions of the data lat_1d = lat_1d[:].squeeze() lon_1d = lon_1d[:].squeeze() # Convert the number of hours since the reference time to an actual date time_val = num2date(time_1d[:].squeeze(), time_1d.units) # Combine latitude and longitudes lon_2d, lat_2d = np.meshgrid(lon_1d, lat_1d) # Flatten() combines all the lists from meshgrid into one list full_lat_1d = lat_2d.flatten() full_lon_1d = lon_2d.flatten() # Create one list that pairs longs and lats lonlat_list = zip(full_lon_1d, full_lat_1d) return lon_2d, lat_2d, var1, time_val, lonlat_list
def get_closest_gfs(time, level, field): """ Retreive the current best 0.25 deg GFS model for a given field, level, time. time : datetime object level : level of results (in hPa) field : CF field to retrieve """ # Get the catalog and best GFS entry catalog = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml') best_gfs = list(catalog.datasets.values())[1] # Using NCSS, build a query and getch the data ncss = NCSS(best_gfs.access_urls['NetcdfSubset']) query = ncss.query() query.lonlat_box(north=90, south=10, east=360, west=160) query.vertical_level(level) query.time(time) query.accept('netcdf4') query.variables(field) data = ncss.get_data(query) # Pull out the variables we will use lat_var = data.variables['lat'] lon_var = data.variables['lon'] data_var = data.variables[field] # Find the correct time dimension name for coord in data_var.coordinates.split(): if 'time' in coord: time_var = data.variables[coord] break # Convert number of hours since the reference time into an actual date time_vals = netCDF4.num2date(time_var[:].squeeze(), time_var.units) # Combine 1D latitude and longitudes into a 2D grid of locations lon_2d, lat_2d = np.meshgrid(lon_var[:], lat_var[:]) # Filter the data to smooth it out a bit data_var = ndimage.gaussian_filter(data_var[:][0][0], sigma=1.5, order=0) return time_vals, lat_2d, lon_2d, data_var
def retrieve_point_forecast(ds, lat, lon, var, ensemble): ''' ds: a siphon dataset object lat: lon: var: model variable name to extract ensemble: True/False indicator if the ds object contains ensemble data Given a siphon dataset object, retrieve the forecast variable for the given coordinates. If the object is from an ensemble, the variables object has an additional dimension. ''' ncss = NCSS(ds.access_urls['NetcdfSubset']) query = ncss.query() query.lonlat_point(lon, lat) query.all_times() query.variables(var).accept('netcdf') data = ncss.get_data(query) temps = data.variables[var] time = data.variables['time'] time_vals = num2date(time[:].squeeze(), time.units) ureg = UnitRegistry() if ensemble: ensemble_temp_series = [] num_ens = temps.shape[2] for i in range(num_ens): temp_vals = ((temps[:, :, i, :].squeeze() * ureg.kelvin) .to(ureg.degF)) temp_series = pd.Series(temp_vals, index=time_vals) ensemble_temp_series.append(temp_series) return ensemble_temp_series else: temp_vals = (temps[:, :, 0].squeeze() * ureg.kelvin).to(ureg.degF) temp_series = pd.Series(temp_vals, index=time_vals) return temp_series
def retrieve_gfs_analysis( time, lat=50, variables=['Geopotential_height_isobaric', 'u-component_of_wind_isobaric']): url = time.strftime( 'https://www.ncei.noaa.gov/thredds/ncss/grid/gfs-g3-anl-files/%Y%m/%Y%m%d/gfsanl_3_%Y%m%d_%H%M_000.grb2/' ) ncss = NCSS(url) query = ncss.query() query.all_times().variables(*variables) query.lonlat_box(north=lat, south=lat, east=360., west=0.) nc_north = ncss.get_data(query) query.lonlat_box(north=-lat, south=-lat, east=360., west=0.) nc_south = ncss.get_data(query) data_north = xr.open_dataset(xr.backends.NetCDF4DataStore(nc_north)) data_south = xr.open_dataset(xr.backends.NetCDF4DataStore(nc_south)) return xr.concat([data_north, data_south], dim='lat')
def set_dataset(self): ''' Retreives the designated dataset, creates NCSS object, and initiates a NCSS query. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query()
def setup(self): dt = datetime(2015, 6, 12, 15, 0, 0) self.ncss = NCSS(self.server + self.urlPath) self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt) self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')
class TestNCSS(object): server = 'http://thredds.ucar.edu/thredds/ncss/' urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2' @recorder.use_cassette('ncss_test_metadata') def setup(self): dt = datetime(2015, 6, 12, 15, 0, 0) self.ncss = NCSS(self.server + self.urlPath) self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt) self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric') def test_good_query(self): assert self.ncss.validate_query(self.nq) def test_bad_query(self): self.nq.variables('foo') assert not self.ncss.validate_query(self.nq) def test_bad_query_no_vars(self): self.nq.var.clear() assert not self.ncss.validate_query(self.nq) @recorder.use_cassette('ncss_gfs_xml_point') def test_xml_point(self): self.nq.accept('xml') xml_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in xml_data assert 'Relative_humidity_isobaric' in xml_data assert xml_data['lat'][0] == 40 assert xml_data['lon'][0] == -105 @recorder.use_cassette('ncss_gfs_csv_point') def test_csv_point(self): self.nq.accept('csv') csv_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in csv_data assert 'Relative_humidity_isobaric' in csv_data assert csv_data['lat'][0] == 40 assert csv_data['lon'][0] == -105 @recorder.use_cassette('ncss_gfs_csv_point') def test_unit_handler_csv(self): self.nq.accept('csv') self.ncss.unit_handler = tuple_unit_handler csv_data = self.ncss.get_data(self.nq) temp = csv_data['Temperature_isobaric'] assert len(temp) == 2 assert temp[1] == 'K' relh = csv_data['Relative_humidity_isobaric'] assert len(relh) == 2 assert relh[1] == '%' @recorder.use_cassette('ncss_gfs_xml_point') def test_unit_handler_xml(self): self.nq.accept('xml') self.ncss.unit_handler = tuple_unit_handler xml_data = self.ncss.get_data(self.nq) temp = xml_data['Temperature_isobaric'] assert len(temp) == 2 assert temp[1] == 'K' relh = xml_data['Relative_humidity_isobaric'] assert len(relh) == 2 assert relh[1] == '%' @recorder.use_cassette('ncss_gfs_netcdf_point') def test_netcdf_point(self): self.nq.accept('netcdf') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables assert nc.variables['latitude'][0] == 40 assert nc.variables['longitude'][0] == -105 @recorder.use_cassette('ncss_gfs_netcdf4_point') def test_netcdf4_point(self): self.nq.accept('netcdf4') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables assert nc.variables['latitude'][0] == 40 assert nc.variables['longitude'][0] == -105 @recorder.use_cassette('ncss_gfs_vertical_level') def test_vertical_level(self): self.nq.accept('csv').vertical_level(50000) csv_data = self.ncss.get_data(self.nq) assert str(csv_data['Temperature_isobaric'])[:6] == '263.39' @recorder.use_cassette('ncss_gfs_csv_point') def test_raw_csv(self): self.nq.accept('csv') csv_data = self.ncss.get_data_raw(self.nq) assert csv_data.startswith(b'date,lat') @recorder.use_cassette('ncss_gfs_csv_point') def test_unknown_mime(self): self.nq.accept('csv') with response_context(): csv_data = self.ncss.get_data(self.nq) assert csv_data.startswith(b'date,lat')
def give_me_latest_gfs(): best_gfs = 'http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/catalog.xml' latest_gfs = get_latest_access_url(best_gfs, "NetcdfSubset") ncss = NCSS(latest_gfs) return ncss
class ForecastModel(object): """ An object for querying and holding forecast model information for use within the pvlib library. Simplifies use of siphon library on a THREDDS server. Parameters ---------- model_type: string UNIDATA category in which the model is located. model_name: string Name of the UNIDATA forecast model. set_type: string Model dataset type. Attributes ---------- access_url: string URL specifying the dataset from data will be retrieved. base_tds_url : string The top level server address catalog_url : string The url path of the catalog to parse. data: pd.DataFrame Data returned from the query. data_format: string Format of the forecast data being requested from UNIDATA. dataset: Dataset Object containing information used to access forecast data. dataframe_variables: list Model variables that are present in the data. datasets_list: list List of all available datasets. fm_models: Dataset TDSCatalog object containing all available forecast models from UNIDATA. fm_models_list: list List of all available forecast models from UNIDATA. latitude: list A list of floats containing latitude values. location: Location A pvlib Location object containing geographic quantities. longitude: list A list of floats containing longitude values. lbox: boolean Indicates the use of a location bounding box. ncss: NCSS object NCSS model_name: string Name of the UNIDATA forecast model. model: Dataset A dictionary of Dataset object, whose keys are the name of the dataset's name. model_url: string The url path of the dataset to parse. modelvariables: list Common variable names that correspond to queryvariables. query: NCSS query object NCSS object used to complete the forecast data retrival. queryvariables: list Variables that are used to query the THREDDS Data Server. time: DatetimeIndex Time range. variables: dict Defines the variables to obtain from the weather model and how they should be renamed to common variable names. units: dict Dictionary containing the units of the standard variables and the model specific variables. vert_level: float or integer Vertical altitude for query data. """ access_url_key = 'NetcdfSubset' catalog_url = 'https://thredds.ucar.edu/thredds/catalog.xml' base_tds_url = catalog_url.split('/thredds/')[0] data_format = 'netcdf' units = { 'temp_air': 'C', 'wind_speed': 'm/s', 'ghi': 'W/m^2', 'ghi_raw': 'W/m^2', 'dni': 'W/m^2', 'dhi': 'W/m^2', 'total_clouds': '%', 'low_clouds': '%', 'mid_clouds': '%', 'high_clouds': '%' } def __init__(self, model_type, model_name, set_type, vert_level=None): self.model_type = model_type self.model_name = model_name self.set_type = set_type self.connected = False self.vert_level = vert_level def connect_to_catalog(self): self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog( self.catalog.catalog_refs[self.model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[self.model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset() self.connected = True def __repr__(self): return '{}, {}'.format(self.model_name, self.set_type) def set_dataset(self): ''' Retrieves the designated dataset, creates NCSS object, and creates a NCSS query object. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query() def set_query_time_range(self, start, end): """ Parameters ---------- start : datetime.datetime, pandas.Timestamp Must be tz-localized. end : datetime.datetime, pandas.Timestamp Must be tz-localized. Notes ----- Assigns ``self.start``, ``self.end``. Modifies ``self.query`` """ self.start = pd.Timestamp(start) self.end = pd.Timestamp(end) if self.start.tz is None or self.end.tz is None: raise TypeError('start and end must be tz-localized') self.query.time_range(self.start, self.end) def set_query_latlon(self): ''' Sets the NCSS query location latitude and longitude. ''' if (isinstance(self.longitude, list) and isinstance(self.latitude, list)): self.lbox = True # west, east, south, north self.query.lonlat_box(self.longitude[0], self.longitude[1], self.latitude[0], self.latitude[1]) else: self.lbox = False self.query.lonlat_point(self.longitude, self.latitude) def set_location(self, tz, latitude, longitude): ''' Sets the location for the query. Parameters ---------- tz: tzinfo Timezone of the query latitude: float Latitude of the query longitude: float Longitude of the query Notes ----- Assigns ``self.location``. ''' self.location = Location(latitude, longitude, tz=tz) def get_data(self, latitude, longitude, start, end, vert_level=None, query_variables=None, close_netcdf_data=True, **kwargs): """ Submits a query to the UNIDATA servers using Siphon NCSS and converts the netcdf data to a pandas DataFrame. Parameters ---------- latitude: float The latitude value. longitude: float The longitude value. start: datetime or timestamp The start time. end: datetime or timestamp The end time. vert_level: None, float or integer, default None Vertical altitude of interest. query_variables: None or list, default None If None, uses self.variables. close_netcdf_data: bool, default True Controls if the temporary netcdf data file should be closed. Set to False to access the raw data. **kwargs: Additional keyword arguments are silently ignored. Returns ------- forecast_data : DataFrame column names are the weather model's variable names. """ if not self.connected: self.connect_to_catalog() if vert_level is not None: self.vert_level = vert_level if query_variables is None: self.query_variables = list(self.variables.values()) else: self.query_variables = query_variables self.set_query_time_range(start, end) self.latitude = latitude self.longitude = longitude self.set_query_latlon() # modifies self.query self.set_location(self.start.tz, latitude, longitude) if self.vert_level is not None: self.query.vertical_level(self.vert_level) self.query.variables(*self.query_variables) self.query.accept(self.data_format) self.netcdf_data = self.ncss.get_data(self.query) # might be better to go to xarray here so that we can handle # higher dimensional data for more advanced applications self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables, self.start, self.end) if close_netcdf_data: self.netcdf_data.close() return self.data def process_data(self, data, **kwargs): """ Defines the steps needed to convert raw forecast data into processed forecast data. Most forecast models implement their own version of this method which also call this one. Parameters ---------- data: DataFrame Raw forecast data Returns ------- data: DataFrame Processed forecast data. """ data = self.rename(data) return data def get_processed_data(self, *args, **kwargs): """ Get and process forecast data. Parameters ---------- *args: positional arguments Passed to get_data **kwargs: keyword arguments Passed to get_data and process_data Returns ------- data: DataFrame Processed forecast data """ return self.process_data(self.get_data(*args, **kwargs), **kwargs) def rename(self, data, variables=None): """ Renames the columns according the variable mapping. Parameters ---------- data: DataFrame variables: None or dict, default None If None, uses self.variables Returns ------- data: DataFrame Renamed data. """ if variables is None: variables = self.variables return data.rename(columns={y: x for x, y in variables.items()}) def _netcdf2pandas(self, netcdf_data, query_variables, start, end): """ Transforms data from netcdf to pandas DataFrame. Parameters ---------- data: netcdf Data returned from UNIDATA NCSS query. query_variables: list The variables requested. start: Timestamp The start time end: Timestamp The end time Returns ------- pd.DataFrame """ # set self.time try: time_var = 'time' self.set_time(netcdf_data.variables[time_var]) except KeyError: # which model does this dumb thing? time_var = 'time1' self.set_time(netcdf_data.variables[time_var]) data_dict = {} for key, data in netcdf_data.variables.items(): # if accounts for possibility of extra variable returned if key not in query_variables: continue squeezed = data[:].squeeze() if squeezed.ndim == 1: data_dict[key] = squeezed elif squeezed.ndim == 2: for num, data_level in enumerate(squeezed.T): data_dict[key + '_' + str(num)] = data_level else: raise ValueError('cannot parse ndim > 2') data = pd.DataFrame(data_dict, index=self.time) # sometimes data is returned as hours since T0 # where T0 is before start. Then the hours between # T0 and start are added *after* end. So sort and slice # to remove the garbage data = data.sort_index().loc[start:end] return data def set_time(self, time): ''' Converts time data into a pandas date object. Parameters ---------- time: netcdf Contains time information. Returns ------- pandas.DatetimeIndex ''' times = num2date(time[:].squeeze(), time.units, only_use_cftime_datetimes=False, only_use_python_datetimes=True) self.time = pd.DatetimeIndex(pd.Series(times), tz=self.location.tz) def cloud_cover_to_ghi_linear(self, cloud_cover, ghi_clear, offset=35, **kwargs): """ Convert cloud cover to GHI using a linear relationship. 0% cloud cover returns ghi_clear. 100% cloud cover returns offset*ghi_clear. Parameters ---------- cloud_cover: numeric Cloud cover in %. ghi_clear: numeric GHI under clear sky conditions. offset: numeric, default 35 Determines the minimum GHI. kwargs Not used. Returns ------- ghi: numeric Estimated GHI. References ---------- Larson et. al. "Day-ahead forecasting of solar power output from photovoltaic plants in the American Southwest" Renewable Energy 91, 11-20 (2016). """ offset = offset / 100. cloud_cover = cloud_cover / 100. ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear return ghi def cloud_cover_to_irradiance_clearsky_scaling(self, cloud_cover, method='linear', **kwargs): """ Estimates irradiance from cloud cover in the following steps: 1. Determine clear sky GHI using Ineichen model and climatological turbidity. 2. Estimate cloudy sky GHI using a function of cloud_cover e.g. :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear` 3. Estimate cloudy sky DNI using the DISC model. 4. Calculate DHI from DNI and GHI. Parameters ---------- cloud_cover : Series Cloud cover in %. method : str, default 'linear' Method for converting cloud cover to GHI. 'linear' is currently the only option. **kwargs Passed to the method that does the conversion Returns ------- irrads : DataFrame Estimated GHI, DNI, and DHI. """ solpos = self.location.get_solarposition(cloud_cover.index) cs = self.location.get_clearsky(cloud_cover.index, model='ineichen', solar_position=solpos) method = method.lower() if method == 'linear': ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'], **kwargs) else: raise ValueError('invalid method argument') dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni'] dhi = ghi - dni * np.cos(np.radians(solpos['zenith'])) irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0) return irrads def cloud_cover_to_transmittance_linear(self, cloud_cover, offset=0.75, **kwargs): """ Convert cloud cover to atmospheric transmittance using a linear model. 0% cloud cover returns offset. 100% cloud cover returns 0. Parameters ---------- cloud_cover : numeric Cloud cover in %. offset : numeric, default 0.75 Determines the maximum transmittance. kwargs Not used. Returns ------- ghi : numeric Estimated GHI. """ transmittance = ((100.0 - cloud_cover) / 100.0) * offset return transmittance def cloud_cover_to_irradiance_liujordan(self, cloud_cover, **kwargs): """ Estimates irradiance from cloud cover in the following steps: 1. Determine transmittance using a function of cloud cover e.g. :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear` 2. Calculate GHI, DNI, DHI using the :py:func:`pvlib.irradiance.liujordan` model Parameters ---------- cloud_cover : Series Returns ------- irradiance : DataFrame Columns include ghi, dni, dhi """ # in principle, get_solarposition could use the forecast # pressure, temp, etc., but the cloud cover forecast is not # accurate enough to justify using these minor corrections solar_position = self.location.get_solarposition(cloud_cover.index) dni_extra = get_extra_radiation(cloud_cover.index) airmass = self.location.get_airmass(cloud_cover.index) transmittance = self.cloud_cover_to_transmittance_linear( cloud_cover, **kwargs) irrads = liujordan(solar_position['apparent_zenith'], transmittance, airmass['airmass_absolute'], dni_extra=dni_extra) irrads = irrads.fillna(0) return irrads def cloud_cover_to_irradiance(self, cloud_cover, how='clearsky_scaling', **kwargs): """ Convert cloud cover to irradiance. A wrapper method. Parameters ---------- cloud_cover : Series how : str, default 'clearsky_scaling' Selects the method for conversion. Can be one of clearsky_scaling or liujordan. **kwargs Passed to the selected method. Returns ------- irradiance : DataFrame Columns include ghi, dni, dhi """ how = how.lower() if how == 'clearsky_scaling': irrads = self.cloud_cover_to_irradiance_clearsky_scaling( cloud_cover, **kwargs) elif how == 'liujordan': irrads = self.cloud_cover_to_irradiance_liujordan( cloud_cover, **kwargs) else: raise ValueError('invalid how argument') return irrads def kelvin_to_celsius(self, temperature): """ Converts Kelvin to celsius. Parameters ---------- temperature: numeric Returns ------- temperature: numeric """ return temperature - 273.15 def isobaric_to_ambient_temperature(self, data): """ Calculates temperature from isobaric temperature. Parameters ---------- data: DataFrame Must contain columns pressure, temperature_iso, temperature_dew_iso. Input temperature in K. Returns ------- temperature : Series Temperature in K """ P = data['pressure'] / 100.0 # noqa: N806 Tiso = data['temperature_iso'] # noqa: N806 Td = data['temperature_dew_iso'] - 273.15 # noqa: N806 # saturation water vapor pressure e = 6.11 * 10**((7.5 * Td) / (Td + 273.3)) # saturation water vapor mixing ratio w = 0.622 * (e / (P - e)) temperature = Tiso - ((2.501 * 10.**6) / 1005.7) * w return temperature def uv_to_speed(self, data): """ Computes wind speed from wind components. Parameters ---------- data : DataFrame Must contain the columns 'wind_speed_u' and 'wind_speed_v'. Returns ------- wind_speed : Series """ wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2) return wind_speed def gust_to_speed(self, data, scaling=1 / 1.4): """ Computes standard wind speed from gust. Very approximate and location dependent. Parameters ---------- data : DataFrame Must contain the column 'wind_speed_gust'. Returns ------- wind_speed : Series """ wind_speed = data['wind_speed_gust'] * scaling return wind_speed
raise ValueError('No time variable found for ' + var.name) ##################################### # Obtain data # Construct a TDSCatalog instance pointing to the gfs dataset best_gfs = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/' 'NCEP/GFS/Global_0p5deg/catalog.xml') # Pull out the dataset you want to use and look at the access URLs best_ds = list(best_gfs.datasets.values())[1] print(best_ds.access_urls) # Create NCSS object to access the NetcdfSubset ncss = NCSS(best_ds.access_urls['NetcdfSubset']) print(best_ds.access_urls['NetcdfSubset']) ##################################### # First Query for MSLP # Create lat/lon box for location you want to get data for query = ncss.query() query.lonlat_box(north=50, south=30, east=-80, west=-115).time(datetime.utcnow()) query.accept('netcdf4') # Request data for MSLP query.variables('MSLP_Eta_model_reduction_msl') data = ncss.get_data(query)
import cartopy.crs as ccrs import cartopy.feature as cfeature import matplotlib.pyplot as plt from metpy.calc import get_wind_speed from metpy.units import units from netCDF4 import num2date import numpy as np import scipy.ndimage as ndimage from siphon.ncss import NCSS ################################## # Set up netCDF Subset Service link dt = datetime(2016, 4, 16, 18) ncss = NCSS( 'http://nomads.ncdc.noaa.gov/thredds/ncss/grid/namanl/' '{0:%Y%m}/{0:%Y%m%d}/namanl_218_{0:%Y%m%d}_{0:%H}00_000.grb'.format(dt)) # Data Query hgt = ncss.query().time(dt) hgt.variables('Geopotential_height', 'u_wind', 'v_wind').add_lonlat() # Actually getting the data data = ncss.get_data(hgt) ################################## # Pull apart the data # Get dimension names to pull appropriate variables dtime = data.variables['Geopotential_height'].dimensions[0] dlev = data.variables['Geopotential_height'].dimensions[1]
'\n' + str(np.int(data[mxy[i], mxx[i]])), color=color, size=12, clip_on=True, fontweight='bold', horizontalalignment='center', verticalalignment='top', transform=transform) ############################### # Get NARR data dattim = datetime(1999, 1, 3, 0) ncss = NCSS( 'https://www.ncei.noaa.gov/thredds/ncss/grid/narr-a-files/{0:%Y%m}/{0:%Y%m%d}/' 'narr-a_221_{0:%Y%m%d}_{0:%H}00_000.grb'.format(dattim)) query = ncss.query() query.all_times().variables( 'Pressure_reduced_to_MSL_msl', 'Geopotential_height_isobaric').add_lonlat().accept('netcdf') data = ncss.get_data(query) ############################### # Extract data into variables # Grab pressure levels plev = list(data.variables['isobaric1'][:]) # Grab lat/lons and make all lons 0-360 lats = data.variables['lat'][:]
import matplotlib.pyplot as plt import metpy.calc as mcalc from metpy.units import units from netCDF4 import num2date import numpy as np import numpy.ma as ma from scipy.ndimage import gaussian_filter from siphon.ncss import NCSS ########################### # **Get the data** # # This example will use data from the North American Mesoscale Model Analysis # (https://nomads.ncdc.gov/) for 12 UTC 27 April 2011. ncss = NCSS( 'https://nomads.ncdc.noaa.gov/thredds/ncss/grid/namanl/201104/20110427/' 'namanl_218_20110427_1800_000.grb') # Query for required variables gfsdata = ncss.query().all_times() gfsdata.variables('Geopotential_height', 'u_wind', 'v_wind', 'Temperature', 'Relative_humidity', 'Best_4-layer_lifted_index', 'Absolute_vorticity', 'Pressure_reduced_to_MSL', 'Dew_point_temperature').add_lonlat() # Set the lat/lon box for the data to pull in. gfsdata.lonlat_box(-135, -60, 15, 65) # Actually getting the data data = ncss.get_data(gfsdata)
def setup(self): """Set up for tests with a default valid query.""" dt = datetime(2015, 6, 12, 15, 0, 0) self.ncss = NCSS(self.server + self.urlPath) self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt) self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')
######################################## # Begin Data Ingest # ----------------- # Request METAR data from TDS metar = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/nws/' 'metar/ncdecoded/catalog.xml') dataset = list(metar.datasets.values())[0] print(list(dataset.access_urls)) ######################################## # What variables are available in dataset? # Access netcdf subset and use siphon to request data ncss_url = dataset.access_urls['NetcdfSubset'] ncss = NCSS(ncss_url) print(ncss.variables) ######################################## # Set query to get desired data from Thredds server # get current date and time now = datetime.utcnow() now = datetime(now.year, now.month, now.day, now.hour) # define time range you want the data for start = now - timedelta(days=1) end = now # build the query query = ncss.query()
from metpy.units import units from netCDF4 import num2date import numpy as np import numpy.ma as ma from scipy.ndimage import gaussian_filter from siphon.ncss import NCSS ########################### # **Get the data** # # This example will use data from the North American Mesoscale Model Analysis # (https://nomads.ncdc.gov/) for 12 UTC 27 April 2011. base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/namanl/' dt = datetime(2011, 4, 27) ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/namanl_218_{dt:%Y%m%d}_' '1800_000.grb'.format(base_url, dt=dt)) # Query for required variables gfsdata = ncss.query().all_times() gfsdata.variables( 'Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric', 'Temperature_isobaric', 'Relative_humidity_isobaric', 'Best_4_layer_lifted_index_layer_between_two_pressure_' 'difference_from_ground_layer', 'Absolute_vorticity_isobaric', 'Pressure_reduced_to_MSL_msl', 'Dew_point_temperature_height_above_ground').add_lonlat() # Set the lat/lon box for the data to pull in. gfsdata.lonlat_box(-135, -60, 15, 65)
cmap=colormap, transform=ccrs.PlateCarree()) cax = plt.subplot(gs[1]) cbar = plt.colorbar(contourfill, cax=cax, orientation='horizontal', extend='max', extendrect=True) # Latest GFS Dataset cat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/' 'NCEP/GFS/Global_0p5deg/latest.xml') #cat.datasets = [GFS_Global_0p5deg_20201128_1200.grib2] best_ds = list(cat.datasets.values())[0] ncss = NCSS(best_ds.access_urls['NetcdfSubset']) #NetCDF subset service object now = datetime.utcnow() data_hght = queryData(ncss, "height") data_wind = queryData(ncss, "wind") latitudes = data_hght.variables['lat'][:] longitudes = data_hght.variables['lon'][:] heights250hPa = data_hght.variables['Geopotential_height_isobaric'][:] # Smooth the 250-hPa heights using a gaussian filter from scipy.ndimage hgt_250, longitudes = cutil.add_cyclic_point(heights250hPa, coord=longitudes) Z_250 = ndimage.gaussian_filter(hgt_250[0, 0, :, :], sigma=3, order=0) u250 = (units(data_wind.variables['u-component_of_wind_isobaric'].units) *
class ForecastModel(object): """ An object for querying and holding forecast model information for use within the pvlib library. Simplifies use of siphon library on a THREDDS server. Parameters ---------- model_type: string UNIDATA category in which the model is located. model_name: string Name of the UNIDATA forecast model. set_type: string Model dataset type. Attributes ---------- access_url: string URL specifying the dataset from data will be retrieved. base_tds_url : string The top level server address catalog_url : string The url path of the catalog to parse. data: pd.DataFrame Data returned from the query. data_format: string Format of the forecast data being requested from UNIDATA. dataset: Dataset Object containing information used to access forecast data. dataframe_variables: list Model variables that are present in the data. datasets_list: list List of all available datasets. fm_models: Dataset TDSCatalog object containing all available forecast models from UNIDATA. fm_models_list: list List of all available forecast models from UNIDATA. latitude: list A list of floats containing latitude values. location: Location A pvlib Location object containing geographic quantities. longitude: list A list of floats containing longitude values. lbox: boolean Indicates the use of a location bounding box. ncss: NCSS object NCSS model_name: string Name of the UNIDATA forecast model. model: Dataset A dictionary of Dataset object, whose keys are the name of the dataset's name. model_url: string The url path of the dataset to parse. modelvariables: list Common variable names that correspond to queryvariables. query: NCSS query object NCSS object used to complete the forecast data retrival. queryvariables: list Variables that are used to query the THREDDS Data Server. time: DatetimeIndex Time range. variables: dict Defines the variables to obtain from the weather model and how they should be renamed to common variable names. units: dict Dictionary containing the units of the standard variables and the model specific variables. vert_level: float or integer Vertical altitude for query data. """ access_url_key = 'NetcdfSubset' catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml' base_tds_url = catalog_url.split('/thredds/')[0] data_format = 'netcdf' vert_level = 100000 units = { 'temp_air': 'C', 'wind_speed': 'm/s', 'ghi': 'W/m^2', 'ghi_raw': 'W/m^2', 'dni': 'W/m^2', 'dhi': 'W/m^2', 'total_clouds': '%', 'low_clouds': '%', 'mid_clouds': '%', 'high_clouds': '%'} def __init__(self, model_type, model_name, set_type): self.model_type = model_type self.model_name = model_name self.set_type = set_type self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset() def __repr__(self): return '{}, {}'.format(self.model_name, self.set_type) def set_dataset(self): ''' Retrieves the designated dataset, creates NCSS object, and creates a NCSS query object. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query() def set_query_latlon(self): ''' Sets the NCSS query location latitude and longitude. ''' if (isinstance(self.longitude, list) and isinstance(self.latitude, list)): self.lbox = True # west, east, south, north self.query.lonlat_box(self.latitude[0], self.latitude[1], self.longitude[0], self.longitude[1]) else: self.lbox = False self.query.lonlat_point(self.longitude, self.latitude) def set_location(self, time, latitude, longitude): ''' Sets the location for the query. Parameters ---------- time: datetime or DatetimeIndex Time range of the query. ''' if isinstance(time, datetime.datetime): tzinfo = time.tzinfo else: tzinfo = time.tz if tzinfo is None: self.location = Location(latitude, longitude) else: self.location = Location(latitude, longitude, tz=tzinfo) def get_data(self, latitude, longitude, start, end, vert_level=None, query_variables=None, close_netcdf_data=True): """ Submits a query to the UNIDATA servers using Siphon NCSS and converts the netcdf data to a pandas DataFrame. Parameters ---------- latitude: float The latitude value. longitude: float The longitude value. start: datetime or timestamp The start time. end: datetime or timestamp The end time. vert_level: None, float or integer Vertical altitude of interest. variables: None or list If None, uses self.variables. close_netcdf_data: bool Controls if the temporary netcdf data file should be closed. Set to False to access the raw data. Returns ------- forecast_data : DataFrame column names are the weather model's variable names. """ if vert_level is not None: self.vert_level = vert_level if query_variables is None: self.query_variables = list(self.variables.values()) else: self.query_variables = query_variables self.latitude = latitude self.longitude = longitude self.set_query_latlon() # modifies self.query self.set_location(start, latitude, longitude) self.start = start self.end = end self.query.time_range(self.start, self.end) self.query.vertical_level(self.vert_level) self.query.variables(*self.query_variables) self.query.accept(self.data_format) self.netcdf_data = self.ncss.get_data(self.query) # might be better to go to xarray here so that we can handle # higher dimensional data for more advanced applications self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables) if close_netcdf_data: self.netcdf_data.close() return self.data def process_data(self, data, **kwargs): """ Defines the steps needed to convert raw forecast data into processed forecast data. Most forecast models implement their own version of this method which also call this one. Parameters ---------- data: DataFrame Raw forecast data Returns ------- data: DataFrame Processed forecast data. """ data = self.rename(data) return data def get_processed_data(self, *args, **kwargs): """ Get and process forecast data. Parameters ---------- *args: positional arguments Passed to get_data **kwargs: keyword arguments Passed to get_data and process_data Returns ------- data: DataFrame Processed forecast data """ return self.process_data(self.get_data(*args, **kwargs), **kwargs) def rename(self, data, variables=None): """ Renames the columns according the variable mapping. Parameters ---------- data: DataFrame variables: None or dict If None, uses self.variables Returns ------- data: DataFrame Renamed data. """ if variables is None: variables = self.variables return data.rename(columns={y: x for x, y in variables.items()}) def _netcdf2pandas(self, netcdf_data, query_variables): """ Transforms data from netcdf to pandas DataFrame. Parameters ---------- data: netcdf Data returned from UNIDATA NCSS query. query_variables: list The variables requested. Returns ------- pd.DataFrame """ # set self.time try: time_var = 'time' self.set_time(netcdf_data.variables[time_var]) except KeyError: # which model does this dumb thing? time_var = 'time1' self.set_time(netcdf_data.variables[time_var]) data_dict = {key: data[:].squeeze() for key, data in netcdf_data.variables.items() if key in query_variables} return pd.DataFrame(data_dict, index=self.time) def set_time(self, time): ''' Converts time data into a pandas date object. Parameters ---------- time: netcdf Contains time information. Returns ------- pandas.DatetimeIndex ''' times = num2date(time[:].squeeze(), time.units) self.time = pd.DatetimeIndex(pd.Series(times), tz=self.location.tz) def cloud_cover_to_ghi_linear(self, cloud_cover, ghi_clear, offset=35, **kwargs): """ Convert cloud cover to GHI using a linear relationship. 0% cloud cover returns ghi_clear. 100% cloud cover returns offset*ghi_clear. Parameters ---------- cloud_cover: numeric Cloud cover in %. ghi_clear: numeric GHI under clear sky conditions. offset: numeric Determines the minimum GHI. kwargs Not used. Returns ------- ghi: numeric Estimated GHI. References ---------- Larson et. al. "Day-ahead forecasting of solar power output from photovoltaic plants in the American Southwest" Renewable Energy 91, 11-20 (2016). """ offset = offset / 100. cloud_cover = cloud_cover / 100. ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear return ghi def cloud_cover_to_irradiance_clearsky_scaling(self, cloud_cover, method='linear', **kwargs): """ Estimates irradiance from cloud cover in the following steps: 1. Determine clear sky GHI using Ineichen model and climatological turbidity. 2. Estimate cloudy sky GHI using a function of cloud_cover e.g. :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear` 3. Estimate cloudy sky DNI using the DISC model. 4. Calculate DHI from DNI and DHI. Parameters ---------- cloud_cover : Series Cloud cover in %. method : str Method for converting cloud cover to GHI. 'linear' is currently the only option. **kwargs Passed to the method that does the conversion Returns ------- irrads : DataFrame Estimated GHI, DNI, and DHI. """ solpos = self.location.get_solarposition(cloud_cover.index) cs = self.location.get_clearsky(cloud_cover.index, model='ineichen', solar_position=solpos) method = method.lower() if method == 'linear': ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'], **kwargs) else: raise ValueError('invalid method argument') dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni'] dhi = ghi - dni * np.cos(np.radians(solpos['zenith'])) irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0) return irrads def cloud_cover_to_transmittance_linear(self, cloud_cover, offset=0.75, **kwargs): """ Convert cloud cover to atmospheric transmittance using a linear model. 0% cloud cover returns offset. 100% cloud cover returns 0. Parameters ---------- cloud_cover : numeric Cloud cover in %. offset : numeric Determines the maximum transmittance. kwargs Not used. Returns ------- ghi : numeric Estimated GHI. """ transmittance = ((100.0 - cloud_cover) / 100.0) * 0.75 return transmittance def cloud_cover_to_irradiance_liujordan(self, cloud_cover, **kwargs): """ Estimates irradiance from cloud cover in the following steps: 1. Determine transmittance using a function of cloud cover e.g. :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear` 2. Calculate GHI, DNI, DHI using the :py:func:`pvlib.irradiance.liujordan` model Parameters ---------- cloud_cover : Series Returns ------- irradiance : DataFrame Columns include ghi, dni, dhi """ # in principle, get_solarposition could use the forecast # pressure, temp, etc., but the cloud cover forecast is not # accurate enough to justify using these minor corrections solar_position = self.location.get_solarposition(cloud_cover.index) dni_extra = extraradiation(cloud_cover.index) airmass = self.location.get_airmass(cloud_cover.index) transmittance = self.cloud_cover_to_transmittance_linear(cloud_cover, **kwargs) irrads = liujordan(solar_position['apparent_zenith'], transmittance, airmass['airmass_absolute'], dni_extra=dni_extra) irrads = irrads.fillna(0) return irrads def cloud_cover_to_irradiance(self, cloud_cover, how='clearsky_scaling', **kwargs): """ Convert cloud cover to irradiance. A wrapper method. Parameters ---------- cloud_cover : Series how : str Selects the method for conversion. Can be one of clearsky_scaling or liujordan. **kwargs Passed to the selected method. Returns ------- irradiance : DataFrame Columns include ghi, dni, dhi """ how = how.lower() if how == 'clearsky_scaling': irrads = self.cloud_cover_to_irradiance_clearsky_scaling( cloud_cover, **kwargs) elif how == 'liujordan': irrads = self.cloud_cover_to_irradiance_liujordan( cloud_cover, **kwargs) else: raise ValueError('invalid how argument') return irrads def kelvin_to_celsius(self, temperature): """ Converts Kelvin to celsius. Parameters ---------- temperature: numeric Returns ------- temperature: numeric """ return temperature - 273.15 def isobaric_to_ambient_temperature(self, data): """ Calculates temperature from isobaric temperature. Parameters ---------- data: DataFrame Must contain columns pressure, temperature_iso, temperature_dew_iso. Input temperature in K. Returns ------- temperature : Series Temperature in K """ P = data['pressure'] / 100.0 Tiso = data['temperature_iso'] Td = data['temperature_dew_iso'] - 273.15 # saturation water vapor pressure e = 6.11 * 10**((7.5 * Td) / (Td + 273.3)) # saturation water vapor mixing ratio w = 0.622 * (e / (P - e)) T = Tiso - ((2.501 * 10.**6) / 1005.7) * w return T def uv_to_speed(self, data): """ Computes wind speed from wind components. Parameters ---------- data : DataFrame Must contain the columns 'wind_speed_u' and 'wind_speed_v'. Returns ------- wind_speed : Series """ wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2) return wind_speed def gust_to_speed(self, data, scaling=1/1.4): """ Computes standard wind speed from gust. Very approximate and location dependent. Parameters ---------- data : DataFrame Must contain the column 'wind_speed_gust'. Returns ------- wind_speed : Series """ wind_speed = data['wind_speed_gust'] * scaling return wind_speed
class ForecastModel(object): ''' An object for holding forecast model information for use within the pvlib library. Simplifies use of siphon library on a THREDDS server. Parameters ---------- model_type: string UNIDATA category in which the model is located. model_name: string Name of the UNIDATA forecast model. set_type: string Model dataset type. Attributes ---------- access_url: string URL specifying the dataset from data will be retrieved. base_tds_url : string The top level server address catalog_url : string The url path of the catalog to parse. columns: list List of headers used to create the data DataFrame. data: pd.DataFrame Data returned from the query. data_format: string Format of the forecast data being requested from UNIDATA. dataset: Dataset Object containing information used to access forecast data. dataframe_variables: list Model variables that are present in the data. datasets_list: list List of all available datasets. fm_models: Dataset Object containing all available foreast models. fm_models_list: list List of all available forecast models from UNIDATA. latitude: list A list of floats containing latitude values. location: Location A pvlib Location object containing geographic quantities. longitude: list A list of floats containing longitude values. lbox: boolean Indicates the use of a location bounding box. ncss: NCSS object NCSS model_name: string Name of the UNIDATA forecast model. model: Dataset A dictionary of Dataset object, whose keys are the name of the dataset's name. model_url: string The url path of the dataset to parse. modelvariables: list Common variable names that correspond to queryvariables. query: NCSS query object NCSS object used to complete the forecast data retrival. queryvariables: list Variables that are used to query the THREDDS Data Server. rad_type: dictionary Dictionary labeling the method used for calculating radiation values. time: datetime Time range specified for the NCSS query. utctime: DatetimeIndex Time range in UTC. var_stdnames: dictionary Dictionary containing the standard names of the variables in the query, where the keys are the common names. var_units: dictionary Dictionary containing the unites of the variables in the query, where the keys are the common names. variables: dictionary Dictionary that translates model specific variables to common named variables. vert_level: float or integer Vertical altitude for query data. wind_type: string Quantity that was used to calculate wind_speed. zenith: numpy.array Solar zenith angles for the given time range. ''' access_url_key = 'NetcdfSubset' catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml' base_tds_url = catalog_url.split('/thredds/')[0] data_format = 'netcdf' vert_level = 100000 columns = np.array(['temperature', 'wind_speed', 'total_clouds', 'low_clouds', 'mid_clouds', 'high_clouds', 'dni', 'dhi', 'ghi', ]) def __init__(self, model_type, model_name, set_type): self.model_type = model_type self.model_name = model_name self.set_type = set_type self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset() def set_dataset(self): ''' Retreives the designated dataset, creates NCSS object, and initiates a NCSS query. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query() def set_query_latlon(self): ''' Sets the NCSS query location latitude and longitude. ''' if isinstance(self.longitude, list): self.lbox = True # west, east, south, north self.query.lonlat_box(self.latitude[0], self.latitude[1], self.longitude[0], self.longitude[1]) else: self.lbox = False self.query.lonlat_point(self.longitude, self.latitude) def set_query_time(self): ''' Sets the NCSS query time range. as: single or range ''' if len(self.utctime) == 1: self.query.time(pd.to_datetime(self.utctime)[0]) else: self.query.time_range(pd.to_datetime(self.utctime)[0], pd.to_datetime(self.utctime)[-1]) def set_location(self, time): ''' Sets the location for Parameters ---------- time: datetime or DatetimeIndex Time range of the query. ''' if isinstance(time, datetime.datetime): tzinfo = time.tzinfo else: tzinfo = time.tz if tzinfo is None: self.location = Location(self.latitude, self.longitude) else: self.location = Location(self.latitude, self.longitude, tz=tzinfo) def get_query_data(self, latitude, longitude, time, vert_level=None, variables=None): ''' Submits a query to the UNIDATA servers using siphon NCSS and converts the netcdf data to a pandas DataFrame. Parameters ---------- latitude: list A list of floats containing latitude values. longitude: list A list of floats containing longitude values. time: pd.datetimeindex Time range of interest. vert_level: float or integer Vertical altitude of interest. variables: dictionary Variables and common names being queried. Returns ------- pd.DataFrame ''' if vert_level is not None: self.vert_level = vert_level if variables is not None: self.variables = variables self.modelvariables = list(self.variables.keys()) self.queryvariables = [self.variables[key] for key in \ self.modelvariables] self.columns = self.modelvariables self.dataframe_variables = self.modelvariables self.latitude = latitude self.longitude = longitude self.set_query_latlon() self.set_location(time) self.utctime = localize_to_utc(time, self.location) self.set_query_time() self.query.vertical_level(self.vert_level) self.query.variables(*self.queryvariables) self.query.accept(self.data_format) netcdf_data = self.ncss.get_data(self.query) try: time_var = 'time' self.set_time(netcdf_data.variables[time_var]) except KeyError: time_var = 'time1' self.set_time(netcdf_data.variables[time_var]) self.data = self.netcdf2pandas(netcdf_data) self.set_variable_units(netcdf_data) self.set_variable_stdnames(netcdf_data) if self.__class__.__name__ is 'HRRR': self.calc_temperature(netcdf_data) self.convert_temperature() self.calc_wind(netcdf_data) self.calc_radiation(netcdf_data) self.data = self.data.tz_convert(self.location.tz) netcdf_data.close() return self.data def netcdf2pandas(self, data): ''' Transforms data from netcdf to pandas DataFrame. Currently only supports one-dimensional netcdf data. Parameters ---------- data: netcdf Data returned from UNIDATA NCSS query. Returns ------- pd.DataFrame ''' if not self.lbox: ''' one-dimensional data ''' data_dict = {} for var in self.dataframe_variables: data_dict[var] = pd.Series( data[self.variables[var]][:].squeeze(), index=self.utctime) return pd.DataFrame(data_dict, columns=self.columns) else: return pd.DataFrame(columns=self.columns, index=self.utctime) def set_time(self, time): ''' Converts time data into a pandas date object. Parameters ---------- time: netcdf Contains time information. Returns ------- pandas.DatetimeIndex ''' times = num2date(time[:].squeeze(), time.units) self.time = pd.DatetimeIndex(pd.Series(times), tz='UTC') self.time = self.time.tz_convert(self.location.tz) self.utctime = localize_to_utc(self.time, self.location.tz) def set_variable_units(self, data): ''' Extracts variable unit information from netcdf data. Parameters ---------- data: netcdf Contains queried variable information. ''' self.var_units = {} for var in self.variables: self.var_units[var] = data[self.variables[var]].units def set_variable_stdnames(self, data): ''' Extracts standard names from netcdf data. Parameters ---------- data: netcdf Contains queried variable information. ''' self.var_stdnames = {} for var in self.variables: try: self.var_stdnames[var] = \ data[self.variables[var]].standard_name except AttributeError: self.var_stdnames[var] = var def calc_radiation(self, data, cloud_type='total_clouds'): ''' Determines shortwave radiation values if they are missing from the model data. Parameters ---------- data: netcdf Query data formatted in netcdf format. cloud_type: string Type of cloud cover to use for calculating radiation values. ''' self.rad_type = {} if not self.lbox and cloud_type in self.modelvariables: cloud_prct = self.data[cloud_type] solpos = get_solarposition(self.time, self.location) self.zenith = np.array(solpos.zenith.tz_convert('UTC')) for rad in ['dni','dhi','ghi']: if self.model_name is 'HRRR_ESRL': # HRRR_ESRL is the only model with the # correct equation of time. if rad in self.modelvariables: self.data[rad] = pd.Series( data[self.variables[rad]][:].squeeze(), index=self.time) self.rad_type[rad] = 'forecast' self.data[rad].fillna(0, inplace=True) else: for rad in ['dni','dhi','ghi']: self.rad_type[rad] = 'liujordan' self.data[rad] = liujordan(self.zenith, cloud_prct)[rad] self.data[rad].fillna(0, inplace=True) for var in ['dni', 'dhi', 'ghi']: self.data[var].fillna(0, inplace=True) self.var_units[var] = '$W m^{-2}$' def convert_temperature(self): ''' Converts Kelvin to celsius. ''' if 'Temperature_surface' in self.queryvariables or 'Temperature_isobaric' in self.queryvariables: self.data['temperature'] -= 273.15 self.var_units['temperature'] = 'C' def calc_temperature(self, data): ''' Calculates temperature (in degrees C) from isobaric temperature. Parameters ---------- data: netcdf Query data in netcdf format. ''' P = data['Pressure_surface'][:].squeeze() / 100.0 Tiso = data['Temperature_isobaric'][:].squeeze() Td = data['Dewpoint_temperature_isobaric'][:].squeeze() - 273.15 e = 6.11 * 10**((7.5 * Td) / (Td + 273.3)) w = 0.622 * (e / (P - e)) T = Tiso - ((2.501 * 10.**6) / 1005.7) * w self.data['temperature'] = T def calc_wind(self, data): ''' Computes wind speed. In some cases only gust wind speed is available. The wind_type attribute will indicate the type of wind speed that is present. Parameters ---------- data: netcdf Query data in netcdf format. ''' if not self.lbox: if 'u-component_of_wind_isobaric' in self.queryvariables and \ 'v-component_of_wind_isobaric' in self.queryvariables: wind_data = np.sqrt(\ data['u-component_of_wind_isobaric'][:].squeeze()**2 + data['v-component_of_wind_isobaric'][:].squeeze()**2) self.wind_type = 'component' elif 'Wind_speed_gust_surface' in self.queryvariables: wind_data = data['Wind_speed_gust_surface'][:].squeeze() self.wind_type = 'gust' if 'wind_speed' in self.data: self.data['wind_speed'] = pd.Series(wind_data, index=self.time) self.var_units['wind_speed'] = 'm/s'
import cartopy.feature as cfeature import matplotlib.gridspec as gridspec import matplotlib.pylab as plt import metpy.calc as mpcalc from metpy.units import units from netCDF4 import num2date import numpy as np import scipy.ndimage as ndimage from siphon.ncss import NCSS ####################################### # Data Aquisition # --------------- # Open the example netCDF data ncss = NCSS('https://www.ncei.noaa.gov/thredds/ncss/grid/namanl/' '201604/20160416/namanl_218_20160416_1800_000.grb') now = datetime.utcnow() # Query for Latest GFS Run hgt = ncss.query().time(datetime(2016, 4, 16, 18)).accept('netcdf') hgt.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric').add_lonlat() # Actually getting the data ds = ncss.get_data(hgt) lon = ds.variables['lon'][:] lat = ds.variables['lat'][:] times = ds.variables[ ds.variables['Geopotential_height_isobaric'].dimensions[0]]
import cartopy.feature as cfeature import cartopy.util as cutil import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import metpy.calc as mpcalc from netCDF4 import num2date import numpy as np import scipy.ndimage as ndimage from siphon.catalog import TDSCatalog from siphon.ncss import NCSS # Latest GFS Dataset cat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/' 'NCEP/GFS/Global_0p5deg/latest.xml') best_ds = list(cat.datasets.values())[0] ncss = NCSS(best_ds.access_urls['NetcdfSubset']) # Set the time to current now = datetime.utcnow() # Query for Latest GFS Run gfsdata = ncss.query().time(now).accept('netcdf4') gfsdata.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric').add_lonlat() # Set the lat/lon box for the data you want to pull in. # lonlat_box(north_lat,south_lat,east_lon,west_lon) gfsdata.lonlat_box(0, 360, 0, 90) # Set desired level 50000 = 50000 Pa = 500 hPa
class ForecastModel(object): ''' An object for holding forecast model information for use within the pvlib library. Simplifies use of siphon library on a THREDDS server. Parameters ---------- model_type: string UNIDATA category in which the model is located. model_name: string Name of the UNIDATA forecast model. set_type: string Model dataset type. Attributes ---------- access_url: string URL specifying the dataset from data will be retrieved. base_tds_url : string The top level server address catalog_url : string The url path of the catalog to parse. columns: list List of headers used to create the data DataFrame. data: pd.DataFrame Data returned from the query. data_format: string Format of the forecast data being requested from UNIDATA. dataset: Dataset Object containing information used to access forecast data. dataframe_variables: list Model variables that are present in the data. datasets_list: list List of all available datasets. fm_models: Dataset Object containing all available foreast models. fm_models_list: list List of all available forecast models from UNIDATA. latitude: list A list of floats containing latitude values. location: Location A pvlib Location object containing geographic quantities. longitude: list A list of floats containing longitude values. lbox: boolean Indicates the use of a location bounding box. ncss: NCSS object NCSS model_name: string Name of the UNIDATA forecast model. model: Dataset A dictionary of Dataset object, whose keys are the name of the dataset's name. model_url: string The url path of the dataset to parse. modelvariables: list Common variable names that correspond to queryvariables. query: NCSS query object NCSS object used to complete the forecast data retrival. queryvariables: list Variables that are used to query the THREDDS Data Server. rad_type: dictionary Dictionary labeling the method used for calculating radiation values. time: datetime Time range specified for the NCSS query. utctime: DatetimeIndex Time range in UTC. var_stdnames: dictionary Dictionary containing the standard names of the variables in the query, where the keys are the common names. var_units: dictionary Dictionary containing the unites of the variables in the query, where the keys are the common names. variables: dictionary Dictionary that translates model specific variables to common named variables. vert_level: float or integer Vertical altitude for query data. wind_type: string Quantity that was used to calculate wind_speed. zenith: numpy.array Solar zenith angles for the given time range. ''' access_url_key = 'NetcdfSubset' catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml' base_tds_url = catalog_url.split('/thredds/')[0] data_format = 'netcdf' vert_level = 100000 columns = np.array([ 'temperature', 'wind_speed', 'total_clouds', 'low_clouds', 'mid_clouds', 'high_clouds', 'dni', 'dhi', 'ghi', ]) def __init__(self, model_type, model_name, set_type): self.model_type = model_type self.model_name = model_name self.set_type = set_type self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset() def set_dataset(self): ''' Retreives the designated dataset, creates NCSS object, and initiates a NCSS query. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query() def set_query_latlon(self): ''' Sets the NCSS query location latitude and longitude. ''' if isinstance(self.longitude, list): self.lbox = True # west, east, south, north self.query.lonlat_box(self.latitude[0], self.latitude[1], self.longitude[0], self.longitude[1]) else: self.lbox = False self.query.lonlat_point(self.longitude, self.latitude) def set_query_time(self): ''' Sets the NCSS query time range. as: single or range ''' if len(self.utctime) == 1: self.query.time(pd.to_datetime(self.utctime)[0]) else: self.query.time_range( pd.to_datetime(self.utctime)[0], pd.to_datetime(self.utctime)[-1]) def set_location(self, time): ''' Sets the location for Parameters ---------- time: datetime or DatetimeIndex Time range of the query. ''' if isinstance(time, datetime.datetime): tzinfo = time.tzinfo else: tzinfo = time.tz if tzinfo is None: self.location = Location(self.latitude, self.longitude) else: self.location = Location(self.latitude, self.longitude, tz=tzinfo) def get_query_data(self, latitude, longitude, time, vert_level=None, variables=None): ''' Submits a query to the UNIDATA servers using siphon NCSS and converts the netcdf data to a pandas DataFrame. Parameters ---------- latitude: list A list of floats containing latitude values. longitude: list A list of floats containing longitude values. time: pd.datetimeindex Time range of interest. vert_level: float or integer Vertical altitude of interest. variables: dictionary Variables and common names being queried. Returns ------- pd.DataFrame ''' if vert_level is not None: self.vert_level = vert_level if variables is not None: self.variables = variables self.modelvariables = list(self.variables.keys()) self.queryvariables = [self.variables[key] for key in \ self.modelvariables] self.columns = self.modelvariables self.dataframe_variables = self.modelvariables self.latitude = latitude self.longitude = longitude self.set_query_latlon() self.set_location(time) self.utctime = localize_to_utc(time, self.location) self.set_query_time() self.query.vertical_level(self.vert_level) self.query.variables(*self.queryvariables) self.query.accept(self.data_format) netcdf_data = self.ncss.get_data(self.query) try: time_var = 'time' self.set_time(netcdf_data.variables[time_var]) except KeyError: time_var = 'time1' self.set_time(netcdf_data.variables[time_var]) self.data = self.netcdf2pandas(netcdf_data) self.set_variable_units(netcdf_data) self.set_variable_stdnames(netcdf_data) if self.__class__.__name__ is 'HRRR': self.calc_temperature(netcdf_data) self.convert_temperature() self.calc_wind(netcdf_data) self.calc_radiation(netcdf_data) self.data = self.data.tz_convert(self.location.tz) netcdf_data.close() return self.data def netcdf2pandas(self, data): ''' Transforms data from netcdf to pandas DataFrame. Currently only supports one-dimensional netcdf data. Parameters ---------- data: netcdf Data returned from UNIDATA NCSS query. Returns ------- pd.DataFrame ''' if not self.lbox: ''' one-dimensional data ''' data_dict = {} for var in self.dataframe_variables: data_dict[var] = pd.Series( data[self.variables[var]][:].squeeze(), index=self.utctime) return pd.DataFrame(data_dict, columns=self.columns) else: return pd.DataFrame(columns=self.columns, index=self.utctime) def set_time(self, time): ''' Converts time data into a pandas date object. Parameters ---------- time: netcdf Contains time information. Returns ------- pandas.DatetimeIndex ''' times = num2date(time[:].squeeze(), time.units) self.time = pd.DatetimeIndex(pd.Series(times), tz='UTC') self.time = self.time.tz_convert(self.location.tz) self.utctime = localize_to_utc(self.time, self.location.tz) def set_variable_units(self, data): ''' Extracts variable unit information from netcdf data. Parameters ---------- data: netcdf Contains queried variable information. ''' self.var_units = {} for var in self.variables: self.var_units[var] = data[self.variables[var]].units def set_variable_stdnames(self, data): ''' Extracts standard names from netcdf data. Parameters ---------- data: netcdf Contains queried variable information. ''' self.var_stdnames = {} for var in self.variables: try: self.var_stdnames[var] = \ data[self.variables[var]].standard_name except AttributeError: self.var_stdnames[var] = var def calc_radiation(self, data, cloud_type='total_clouds'): ''' Determines shortwave radiation values if they are missing from the model data. Parameters ---------- data: netcdf Query data formatted in netcdf format. cloud_type: string Type of cloud cover to use for calculating radiation values. ''' self.rad_type = {} if not self.lbox and cloud_type in self.modelvariables: cloud_prct = self.data[cloud_type] solpos = get_solarposition(self.time, self.location) self.zenith = np.array(solpos.zenith.tz_convert('UTC')) for rad in ['dni', 'dhi', 'ghi']: if self.model_name is 'HRRR_ESRL': # HRRR_ESRL is the only model with the # correct equation of time. if rad in self.modelvariables: self.data[rad] = pd.Series( data[self.variables[rad]][:].squeeze(), index=self.time) self.rad_type[rad] = 'forecast' self.data[rad].fillna(0, inplace=True) else: for rad in ['dni', 'dhi', 'ghi']: self.rad_type[rad] = 'liujordan' self.data[rad] = liujordan(self.zenith, cloud_prct)[rad] self.data[rad].fillna(0, inplace=True) for var in ['dni', 'dhi', 'ghi']: self.data[var].fillna(0, inplace=True) self.var_units[var] = '$W m^{-2}$' def convert_temperature(self): ''' Converts Kelvin to celsius. ''' if 'Temperature_surface' in self.queryvariables or 'Temperature_isobaric' in self.queryvariables: self.data['temperature'] -= 273.15 self.var_units['temperature'] = 'C' def calc_temperature(self, data): ''' Calculates temperature (in degrees C) from isobaric temperature. Parameters ---------- data: netcdf Query data in netcdf format. ''' P = data['Pressure_surface'][:].squeeze() / 100.0 Tiso = data['Temperature_isobaric'][:].squeeze() Td = data['Dewpoint_temperature_isobaric'][:].squeeze() - 273.15 e = 6.11 * 10**((7.5 * Td) / (Td + 273.3)) w = 0.622 * (e / (P - e)) T = Tiso - ((2.501 * 10.**6) / 1005.7) * w self.data['temperature'] = T def calc_wind(self, data): ''' Computes wind speed. In some cases only gust wind speed is available. The wind_type attribute will indicate the type of wind speed that is present. Parameters ---------- data: netcdf Query data in netcdf format. ''' if not self.lbox: if 'u-component_of_wind_isobaric' in self.queryvariables and \ 'v-component_of_wind_isobaric' in self.queryvariables: wind_data = np.sqrt(\ data['u-component_of_wind_isobaric'][:].squeeze()**2 + data['v-component_of_wind_isobaric'][:].squeeze()**2) self.wind_type = 'component' elif 'Wind_speed_gust_surface' in self.queryvariables: wind_data = data['Wind_speed_gust_surface'][:].squeeze() self.wind_type = 'gust' if 'wind_speed' in self.data: self.data['wind_speed'] = pd.Series(wind_data, index=self.time) self.var_units['wind_speed'] = 'm/s'
import cartopy.feature as cfeature import matplotlib.pyplot as plt import metpy.calc as mpcalc from metpy.units import units from netCDF4 import num2date import numpy as np import scipy.ndimage as ndimage from siphon.ncss import NCSS ######################################## # Set up access to the data # Create NCSS object to access the NetcdfSubset base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/gfs-g4-anl-files/' dt = datetime(2016, 8, 22, 18) ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/gfsanl_4_{dt:%Y%m%d}_' '{dt:%H}00_003.grb2'.format(base_url, dt=dt)) # Create lat/lon box for location you want to get data for query = ncss.query() query.lonlat_box(north=50, south=30, east=-80, west=-115) query.time(datetime(2016, 8, 22, 21)) # Request data for geopotential height query.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric') query.vertical_level(100000) data = ncss.get_data(query) # Pull out variables you want to use height_var = data.variables['Geopotential_height_isobaric'] u_wind_var = data.variables['u-component_of_wind_isobaric']
def get_ensemble_point(point, variables=['Temperature_height_above_ground_ens'], start=datetime.utcnow() - timedelta(hours=12), end=datetime.utcnow() + timedelta(hours=48)): """ Retrieves the latest ("best") ensemble forecast valid at a single point from the Unidata THREDDS server using the Unidata siphon library. Requires: point -> A tuple of (lat, lon) of the point we are trying to retrieve variables -> A list of variables we want to retrieve. Check this page for a full list: http://thredds.ucar.edu/thredds/metadata/grib/NCEP/GEFS/Global_1p0deg_Ensemble/members/Best?metadata=variableMap start -> A datetime object of the earliest time to look for an ensemble initialization, default is current time minus 12 hours end -> The last time for which we want ensemble forecast output. Default is current time plus 48 hours. Returns: A dictionary with one item being the list of valid times in the data ('times') and the rest of the items being numpy arrays of nTimes x nEnsmems for each variable requested """ # Import the Siphon utilities from siphon.catalog import TDSCatalog from siphon.ncss import NCSS # In Siphon, we connect to a thredds catalog. Here's the address for the GEFS catalog = 'http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GEFS/Global_1p0deg_Ensemble/members/catalog.xml' best_model = TDSCatalog(catalog) # We select a specific dataset in this catalog, in this case the "best" (most recent) ensemble run best_ds = list(best_model.datasets.values())[2] ncss = NCSS(best_ds.access_urls['NetcdfSubset']) # Here we format our subsetting query. We specify the exact point we want, # the time range, and the variables we are requesting. We're also going # to retrieve the data in a netcdf-like format query = ncss.query() query.lonlat_point(point[1], point[0]) query.time_range(start, end) query.variables(*variables) query.accept('netcdf') # Actually get the data data = ncss.get_data(query) # Format our output into a dictionary output = {} for v in variables: # After the squeeze, this is a nTimes x nEns array output[v] = np.squeeze(data.variables[v][:]) #print output[v].shape # Also, add times # The 'time' variable is hours since "time_coverage_start" # Get this in datetime format raw_hours = list(np.squeeze(data.variables['time'][:])) init_time = datetime.strptime(str(data.time_coverage_start), '%Y-%m-%dT%H:%M:%SZ') output['times'] = [init_time + timedelta(hours=int(x)) for x in raw_hours] # Return a dictionary return output
class TestNCSS(object): """Test NCSS queries and response parsing.""" server = 'http://thredds.ucar.edu/thredds/ncss/' urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2' @recorder.use_cassette('ncss_test_metadata') def setup(self): """Set up for tests with a default valid query.""" dt = datetime(2015, 6, 12, 15, 0, 0) self.ncss = NCSS(self.server + self.urlPath) self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt) self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric') def test_good_query(self): """Test that a good query is properly validated.""" assert self.ncss.validate_query(self.nq) def test_bad_query(self): """Test that a query with an unknown variable is invalid.""" self.nq.variables('foo') assert not self.ncss.validate_query(self.nq) def test_empty_query(self): """Test that an empty query is invalid.""" query = self.ncss.query() res = self.ncss.validate_query(query) assert not res assert not isinstance(res, set) def test_bad_query_no_vars(self): """Test that a query without variables is invalid.""" self.nq.var.clear() assert not self.ncss.validate_query(self.nq) @recorder.use_cassette('ncss_gfs_xml_point') def test_xml_point(self): """Test parsing XML point returns.""" self.nq.accept('xml') xml_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in xml_data assert 'Relative_humidity_isobaric' in xml_data assert xml_data['lat'][0] == 40 assert xml_data['lon'][0] == -105 @recorder.use_cassette('ncss_gfs_csv_point') def test_csv_point(self): """Test parsing CSV point returns.""" self.nq.accept('csv') csv_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in csv_data assert 'Relative_humidity_isobaric' in csv_data assert csv_data['lat'][0] == 40 assert csv_data['lon'][0] == -105 @recorder.use_cassette('ncss_gfs_csv_point') def test_unit_handler_csv(self): """Test unit-handling from CSV returns.""" self.nq.accept('csv') self.ncss.unit_handler = tuple_unit_handler csv_data = self.ncss.get_data(self.nq) temp = csv_data['Temperature_isobaric'] assert len(temp) == 2 assert temp[1] == 'K' relh = csv_data['Relative_humidity_isobaric'] assert len(relh) == 2 assert relh[1] == '%' @recorder.use_cassette('ncss_gfs_xml_point') def test_unit_handler_xml(self): """Test unit-handling from XML returns.""" self.nq.accept('xml') self.ncss.unit_handler = tuple_unit_handler xml_data = self.ncss.get_data(self.nq) temp = xml_data['Temperature_isobaric'] assert len(temp) == 2 assert temp[1] == 'K' relh = xml_data['Relative_humidity_isobaric'] assert len(relh) == 2 assert relh[1] == '%' @recorder.use_cassette('ncss_gfs_netcdf_point') def test_netcdf_point(self): """Test handling of netCDF point returns.""" self.nq.accept('netcdf') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables assert nc.variables['latitude'][0] == 40 assert nc.variables['longitude'][0] == -105 @recorder.use_cassette('ncss_gfs_netcdf4_point') def test_netcdf4_point(self): """Test handling of netCDF4 point returns.""" self.nq.accept('netcdf4') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables assert nc.variables['latitude'][0] == 40 assert nc.variables['longitude'][0] == -105 @recorder.use_cassette('ncss_gfs_vertical_level') def test_vertical_level(self): """Test data return from a single vertical level is correct.""" self.nq.accept('csv').vertical_level(50000) csv_data = self.ncss.get_data(self.nq) np.testing.assert_almost_equal(csv_data['Temperature_isobaric'], np.array([263.40]), 2) @recorder.use_cassette('ncss_gfs_csv_point') def test_raw_csv(self): """Test CSV point return from a GFS request.""" self.nq.accept('csv') csv_data = self.ncss.get_data_raw(self.nq) assert csv_data.startswith(b'date,lat') @recorder.use_cassette('ncss_gfs_csv_point') def test_unknown_mime(self): """Test handling of unknown mimetypes.""" self.nq.accept('csv') with response_context(): csv_data = self.ncss.get_data(self.nq) assert csv_data.startswith(b'date,lat')
for j in range(latitude.shape[0]): _, _, dx[j, i] = g.inv(longitude[j, i], latitude[j, i], longitude[j, i + 1], latitude[j, i + 1]) dx[:, i + 1] = dx[:, i] xdiff_sign = np.sign(longitude[0, 1] - longitude[0, 0]) ydiff_sign = np.sign(latitude[1, 0] - latitude[0, 0]) return xdiff_sign * dx * units.meter, ydiff_sign * dy * units.meter ###################################### # Set up access to the data # Create NCSS object to access the NetcdfSubset ncss = NCSS( 'https://nomads.ncdc.noaa.gov/thredds/ncss/grid/gfs-004-anl/201608/20160822/' 'gfsanl_4_20160822_1800_003.grb2') # Create lat/lon box for location you want to get data for query = ncss.query() query.lonlat_box(north=50, south=30, east=-80, west=-115) query.time(datetime(2016, 8, 22, 21)) # Request data for geopotential height query.variables('Geopotential_height', 'U-component_of_wind', 'V-component_of_wind') query.vertical_level(100000) data = ncss.get_data(query) # Pull out variables you want to use height_var = data.variables['Geopotential_height']
from netCDF4 import num2date import numpy as np from metpy.units import units import scipy.ndimage as ndimage from siphon.catalog import TDSCatalog from siphon.ncss import NCSS # ============================================================================= # RETRIEVE RAP AND HRRR DATA # ============================================================================= RAP = 'http://thredds-jetstream.unidata.ucar.edu/thredds/catalog/grib/NCEP/RAP/CONUS_20km/latest.xml' HRRR= 'http://thredds-jetstream.unidata.ucar.edu/thredds/catalog/grib/NCEP/HRRR/CONUS_2p5km/latest.xml' GFS = 'http://thredds-jetstream.unidata.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/latest.xml' DATA = TDSCatalog(GFS) BEST_DATA = list(DATA.datasets.values())[0] NCSS_DATA = NCSS(BEST_DATA.access_urls['NetcdfSubset']) NOW = datetime.utcnow() LATEST_DATA = NCSS_DATA.query().time(NOW).accept('netcdf4') # ============================================================================= # UPPER-AIR VARIABLES # ============================================================================= # 250: JET STREAM, GEOPOTENTIAL HEIGHT, POTENTIAL VORTICITY, IRROTATIONAL WIND def 250hPa_GFS_jet_stream_SLP(lon_west, lon_east, lat_south, lat_north): def 250hPa_GFS_jet_stream_jet_dyn(lon_west, lon_east, lat_south, lat_north): # 500: VORTICITY, GEOPOTENTIAL HEIGHT, VORTICITY ADVECTION def 500hPa_GFS_vorticity(lon_west, lon_east, lat_south, lat_north): LATEST_DATA.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric').add_lonlat() LATEST_DATA.lonlat_box(lon_west, lon_east, lat_south, lat_north) LATEST_DATA.vertical_level(50000)
from ipywidgets import interact_manual import matplotlib.pyplot as plt import metpy.calc as mcalc from metpy.units import units from netCDF4 import num2date import numpy as np from siphon.ncss import NCSS ####################################### # **Getting the data** # # In this example, NARR reanalysis data for 18 UTC 04 April 1987 from the National Centers # for Environmental Information (https://nomads.ncdc.noaa.gov) will be used. # Link to NetCDF subset service for NAM analysis data ncss = NCSS('https://nomads.ncdc.noaa.gov/thredds/ncss/grid/narr-a/198704/19870404/' 'narr-a_221_19870404_1800_000.grb') # Bring in needed data modeldata = ncss.query().all_times() modeldata.variables('Geopotential_height', 'u_wind', 'v_wind', 'Temperature', 'Specific_humidity').add_lonlat() # Set the lat/lon box for the data you want to pull in. # lonlat_box(north_lat,south_lat,east_lon,west_lon) modeldata.lonlat_box(-140, -60, 16, 60) # Actually getting the data data = ncss.get_data(modeldata)
def radar_plus_obs(station, my_datetime, radar_title=None, bb=None, station_radius=75000., station_layout=simple_layout, field='reflectivity', vmin=None, vmax=None, sweep=0): if radar_title is None: radar_title = 'Area ' radar = get_radar_from_aws(station, my_datetime) # Lets get some geographical context if bb is None: lats = radar.gate_latitude lons = radar.gate_longitude min_lon = lons['data'].min() min_lat = lats['data'].min() max_lat = lats['data'].max() max_lon = lons['data'].max() bb = {'north' : max_lat, 'south' : min_lat, 'east' : max_lon, 'west' : min_lon} else: min_lon = bb['west'] min_lat = bb['south'] max_lon = bb['east'] max_lat = bb['north'] print('min_lat:', min_lat, ' min_lon:', min_lon, ' max_lat:', max_lat, ' max_lon:', max_lon) index_at_start = radar.sweep_start_ray_index['data'][sweep] time_at_start_of_radar = num2date(radar.time['data'][index_at_start], radar.time['units']) pacific = pytz.timezone('US/Central') local_time = pacific.fromutc(time_at_start_of_radar) fancy_date_string = local_time.strftime('%A %B %d at %I:%M %p %Z') print(fancy_date_string) metar_cat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/nws/metar/ncdecoded/catalog.xml?' 'dataset=nws/metar/ncdecoded/Metar_Station_Data_fc.cdmr') dataset = list(metar_cat.datasets.values())[0] ncss = NCSS(dataset.access_urls["NetcdfSubset"]) query = ncss.query().accept('csv').time(time_at_start_of_radar) query.lonlat_box(north=max_lat, south=min_lat, east=max_lon, west=min_lon) query.variables('air_temperature', 'dew_point_temperature', 'inches_ALTIM', 'wind_speed', 'wind_from_direction', 'cloud_area_fraction', 'weather') data = ncss.get_data(query) lats = data['latitude'][:] lons = data['longitude'][:] tair = data['air_temperature'][:] dewp = data['dew_point_temperature'][:] slp = (data['inches_ALTIM'][:] * units('inHg')).to('mbar') # Convert wind to components u, v = mpcalc.get_wind_components(data['wind_speed'] * units.knot, data['wind_from_direction'] * units.deg) # Need to handle missing (NaN) and convert to proper code cloud_cover = 8 * data['cloud_area_fraction'] cloud_cover[np.isnan(cloud_cover)] = 9 cloud_cover = cloud_cover.astype(np.int) # For some reason these come back as bytes instead of strings stid = [s.decode() for s in data['station']] # Convert the text weather observations to WMO codes we can map to symbols #wx_text = [s.decode('ascii') for s in data['weather']] #wx_codes = np.array(list(to_code(wx_text))) sfc_data = {'latitude': lats, 'longitude': lons, 'air_temperature': tair, 'dew_point_temperature': dewp, 'eastward_wind': u, 'northward_wind': v, 'cloud_coverage': cloud_cover, 'air_pressure_at_sea_level': slp}#, 'present_weather': wx_codes} fig = plt.figure(figsize=(10, 8)) display = pyart.graph.RadarMapDisplayCartopy(radar) lat_0 = display.loc[0] lon_0 = display.loc[1] # Set our Projection projection = cartopy.crs.Mercator(central_longitude=lon_0, min_latitude=min_lat, max_latitude=max_lat) # Call our function to reduce data filter_data(sfc_data, projection, radius=station_radius, sort_key='present_weather') print(sweep) display.plot_ppi_map( field, sweep, colorbar_flag=True, title=radar_title +' area ' + field + ' \n' + fancy_date_string, projection=projection, min_lon=min_lon, max_lon=max_lon, min_lat=min_lat, max_lat=max_lat, vmin=vmin, vmax=vmax) # Mark the radar display.plot_point(lon_0, lat_0, label_text='Radar') # Get the current axes and plot some lat and lon lines gl = display.ax.gridlines(draw_labels=True, linewidth=2, color='gray', alpha=0.5, linestyle='--') gl.xlabels_top = False gl.ylabels_right = False # Make the station plot stationplot = StationPlot(display.ax, sfc_data['longitude'], sfc_data['latitude'], transform=cartopy.crs.PlateCarree(), fontsize=12) station_layout.plot(stationplot, sfc_data) return display, time_at_start_of_radar
%matplotlib inline # <headingcell level=2> # Extract HRRR data using Unidata's Siphon package # <codecell> # Resolve the latest HRRR dataset from siphon.catalog import TDSCatalog latest_hrrr = TDSCatalog('http://thredds-jumbo.unidata.ucar.edu/thredds/catalog/grib/HRRR/CONUS_3km/surface/latest.xml') hrrr_ds = list(latest_hrrr.datasets.values())[0] # Set up access via NCSS from siphon.ncss import NCSS ncss = NCSS(hrrr_ds.access_urls['NetcdfSubset']) # Create a query to ask for all times in netcdf4 format for # the Temperature_surface variable, with a bounding box query = ncss.query() # <codecell> dap_url = hrrr_ds.access_urls['OPENDAP'] # <codecell> query.all_times().accept('netcdf4').variables('u-component_of_wind_height_above_ground', 'v-component_of_wind_height_above_ground') query.lonlat_box(45, 41., -63, -71.5)