Example #1
0
def get_obs(ts, mybb):
    # copied from the browser url box
    metar_cat_url = 'http://thredds.ucar.edu/thredds/catalog/nws/metar/ncdecoded/catalog.xml?dataset=nws/metar/ncdecoded/Metar_Station_Data_fc.cdmr'
    # parse the xml
    metar_cat = TDSCatalog(metar_cat_url)
    # what datasets are here? only one "dataset" in this catalog
    dataset = list(metar_cat.datasets.values())[0]
    ncss_url = dataset.access_urls["NetcdfSubset"]
    ncss = NCSS(ncss_url)

    query = ncss.query().accept('csv').time(ts - datetime.timedelta(minutes=1))
    query.lonlat_box(**mybb)
    query.variables('air_temperature', 'dew_point_temperature', 'inches_ALTIM',
                    'wind_speed', 'wind_from_direction', 'cloud_area_fraction', 'weather')

    try:
        data = ncss.get_data(query)
        lats = data['latitude'][:]
        lons = data['longitude'][:]
        tair = data['air_temperature'][:]
        dewp = data['dew_point_temperature'][:]
        slp = (data['inches_ALTIM'][:] * units('inHg')).to('mbar')

        # Convert wind to components
        u, v = mpcalc.get_wind_components(data['wind_speed'] * units.knot,
                                          data['wind_from_direction'] * units.deg)

        # Need to handle missing (NaN) and convert to proper code
        cloud_cover = 8 * data['cloud_area_fraction']
        cloud_cover[np.isnan(cloud_cover)] = 9
        cloud_cover = cloud_cover.astype(np.int)

        # For some reason these come back as bytes instead of strings
        stid = [s.decode() for s in data['station']]

        # Convert the text weather observations to WMO codes we can map to symbols
        if data['weather'].dtype != bool:
            wx_text = [s.decode('ascii') for s in data['weather']]
            wx_codes = np.array(list(to_code(wx_text)))
        else:
            wx_codes = np.array([0] * len(data['weather']))

        sfc_data = {'latitude': lats, 'longitude': lons,
                    'air_temperature': tair, 'dew_point_temperature': dewp, 'eastward_wind': u,
                    'northward_wind': v, 'cloud_coverage': cloud_cover,
                    'air_pressure_at_sea_level': slp, 'present_weather': wx_codes}

        have_obs = True
    except:
        have_obs = False
        sfc_data = {}

    return sfc_data, have_obs
Example #2
0
    def get_sounding(source,  lat, long):
        # source unused for now bc testing only on ncss
        source_place_holder = source
        #print(source_place_holder)
        best_gfs = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/' +
                              'catalog.xml?dataset=grib/NCEP/GFS/Global_0p5deg/Best')
        best_ds = list(best_gfs.datasets.values())[0]

        ncss = NCSS(best_ds.access_urls['NetcdfSubset'])
        query = ncss.query()
        query.lonlat_point(long, lat).time(datetime.utcnow())
        query.accept('netcdf4')
        query.variables('Temperature_isobaric', 'Relative_humidity_isobaric', 'u-component_of_wind_isobaric',
                        'v-component_of_wind_isobaric')

        data = ncss.get_data(query)

        temp = data.variables['Temperature_isobaric']
        temp_vals = temp[:].squeeze() * units.kelvin
        relh = data.variables['Relative_humidity_isobaric']
        relh_values = relh[:] / 100
        td = dewpoint_rh(temp_vals, relh_values)
        td_vals = td[:].squeeze()
        press = data.variables['isobaric3']
        press_vals = press[:].squeeze()

        u_wind = data.variables['u-component_of_wind_isobaric']
        u_wind_vals = u_wind[:].squeeze()

        v_wind = data.variables['v-component_of_wind_isobaric']
        v_wind_vals = v_wind[:].squeeze()
        # Put temp, dewpoint, pressure, u/v winds into numpy arrays and reorder
        t = np.array(temp_vals)[::-1]
        td = np.array(td_vals)[::-1]
        p = np.array(press_vals)[::-1]
        u = np.array(u_wind_vals)[::-1]
        v = np.array(v_wind_vals)[::-1]

        # Change units for proper skew-T
        p = (p * units.pascals).to('mbar')
        t = (t * units.kelvin).to('degC')
        td = td * units.degC
        u = (u * units('m/s')).to('knot')
        v = (v * units('m/s')).to('knot')
        # spd = spd * units.knot
        # direc = direc * units.deg
        # u, v = get_wind_components(spd, direc)

        return t, td, p, u, v, lat, long, str(datetime.utcnow())[:-7]
Example #3
0
def get_data(lon_w, lon_e, lat_s, lat_n, variable):
    """TODO
        Add reset, change colors of map, variable selection, model selection, lat/long validator
    """
    cat_url = 'http://thredds-jumbo.unidata.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml'
    latest_gfs = get_latest_access_url(cat_url, 'NetcdfSubset')
    ncss = NCSS(latest_gfs)

    query = ncss.query()
    query.lonlat_box(west=lon_w, east=lon_e, south=lat_s, north=lat_n).all_times()
    query.accept('netcdf4')
    query.variables(variable_dict(variable))
    data = ncss.get_data(query)
    list(data.variables.keys())
    var1 = data.variables[variable_dict(variable)]

    # only works if has name time+... or only has 1 dimension
    for dim in var1.dimensions:
        if 'time' in dim:
            time_name = dim
    if time_name is None:
        raise ValueError("Couldn't find a time dimension for " + var1.name)
    time_1d = data.variables[time_name]
    lat_1d = data.variables['lat']
    lon_1d = data.variables['lon']

    # Reduce the dimensions of the data
    lat_1d = lat_1d[:].squeeze()
    lon_1d = lon_1d[:].squeeze()

    # Convert the number of hours since the reference time to an actual date
    time_val = num2date(time_1d[:].squeeze(), time_1d.units)

    # Combine latitude and longitudes
    lon_2d, lat_2d = np.meshgrid(lon_1d, lat_1d)

    # Flatten() combines all the lists from meshgrid into one list
    full_lat_1d = lat_2d.flatten()
    full_lon_1d = lon_2d.flatten()

    # Create one list that pairs longs and lats
    lonlat_list = zip(full_lon_1d, full_lat_1d)

    return lon_2d, lat_2d, var1, time_val, lonlat_list
def get_closest_gfs(time, level, field):
    """
    Retreive the current best 0.25 deg GFS model for a given field, level, time.

    time : datetime object
    level : level of results (in hPa)
    field : CF field to retrieve
    """

    # Get the catalog and best GFS entry
    catalog = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml')
    best_gfs = list(catalog.datasets.values())[1]

    # Using NCSS, build a query and getch the data
    ncss = NCSS(best_gfs.access_urls['NetcdfSubset'])
    query = ncss.query()
    query.lonlat_box(north=90, south=10, east=360, west=160)
    query.vertical_level(level)
    query.time(time)
    query.accept('netcdf4')
    query.variables(field)
    data = ncss.get_data(query)

    # Pull out the variables we will use
    lat_var = data.variables['lat']
    lon_var = data.variables['lon']
    data_var = data.variables[field]

    # Find the correct time dimension name
    for coord in data_var.coordinates.split():
        if 'time' in coord:
            time_var = data.variables[coord]
            break

    # Convert number of hours since the reference time into an actual date
    time_vals = netCDF4.num2date(time_var[:].squeeze(), time_var.units)

    # Combine 1D latitude and longitudes into a 2D grid of locations
    lon_2d, lat_2d = np.meshgrid(lon_var[:], lat_var[:])

    # Filter the data to smooth it out a bit
    data_var = ndimage.gaussian_filter(data_var[:][0][0], sigma=1.5, order=0)

    return time_vals, lat_2d, lon_2d, data_var
Example #5
0
def retrieve_point_forecast(ds, lat, lon, var, ensemble):
    '''
    ds: a siphon dataset object
    lat:
    lon:
    var: model variable name to extract
    ensemble: True/False indicator if the ds object contains ensemble data

    Given a siphon dataset object, retrieve the forecast variable for the
    given coordinates.

    If the object is from an ensemble, the variables object has an
    additional dimension.
    '''

    ncss = NCSS(ds.access_urls['NetcdfSubset'])
    query = ncss.query()
    query.lonlat_point(lon, lat)
    query.all_times()
    query.variables(var).accept('netcdf')

    data = ncss.get_data(query)
    temps = data.variables[var]
    time = data.variables['time']
    time_vals = num2date(time[:].squeeze(), time.units)
    ureg = UnitRegistry()

    if ensemble:
        ensemble_temp_series = []
        num_ens = temps.shape[2]
        for i in range(num_ens):
            temp_vals = ((temps[:, :, i, :].squeeze() * ureg.kelvin)
                         .to(ureg.degF))
            temp_series = pd.Series(temp_vals, index=time_vals)
            ensemble_temp_series.append(temp_series)
        return ensemble_temp_series
    else:
        temp_vals = (temps[:, :, 0].squeeze() * ureg.kelvin).to(ureg.degF)
        temp_series = pd.Series(temp_vals, index=time_vals)
        return temp_series
Example #6
0
class ForecastModel(object):
    '''
    An object for holding forecast model information for use within the 
    pvlib library.

    Simplifies use of siphon library on a THREDDS server.

    Parameters
    ----------
    model_type: string
        UNIDATA category in which the model is located.
    model_name: string
        Name of the UNIDATA forecast model.
    set_type: string
        Model dataset type.

    Attributes
    ----------
    access_url: string
        URL specifying the dataset from data will be retrieved.
    base_tds_url : string
        The top level server address
    catalog_url : string
        The url path of the catalog to parse.
    columns: list
        List of headers used to create the data DataFrame.
    data: pd.DataFrame
        Data returned from the query.
    data_format: string
        Format of the forecast data being requested from UNIDATA.
    dataset: Dataset
        Object containing information used to access forecast data.
    dataframe_variables: list
        Model variables that are present in the data.
    datasets_list: list
        List of all available datasets.
    fm_models: Dataset
        Object containing all available foreast models.
    fm_models_list: list
        List of all available forecast models from UNIDATA.
    latitude: list
        A list of floats containing latitude values.
    location: Location
        A pvlib Location object containing geographic quantities.
    longitude: list
        A list of floats containing longitude values.
    lbox: boolean
        Indicates the use of a location bounding box.
    ncss: NCSS object
        NCSS    model_name: string
        Name of the UNIDATA forecast model.    
    model: Dataset
        A dictionary of Dataset object, whose keys are the name of the
        dataset's name.
    model_url: string
        The url path of the dataset to parse.
    modelvariables: list
        Common variable names that correspond to queryvariables.
    query: NCSS query object
        NCSS object used to complete the forecast data retrival.
    queryvariables: list
        Variables that are used to query the THREDDS Data Server.
    rad_type: dictionary
        Dictionary labeling the method used for calculating radiation values.
    time: datetime
        Time range specified for the NCSS query.
    utctime: DatetimeIndex
        Time range in UTC.
    var_stdnames: dictionary
        Dictionary containing the standard names of the variables in the
        query, where the keys are the common names.
    var_units: dictionary
        Dictionary containing the unites of the variables in the query,
        where the keys are the common names.
    variables: dictionary
        Dictionary that translates model specific variables to 
        common named variables.
    vert_level: float or integer
        Vertical altitude for query data.
    wind_type: string
        Quantity that was used to calculate wind_speed.
    zenith: numpy.array
        Solar zenith angles for the given time range.
    '''

    access_url_key = 'NetcdfSubset'
    catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml'
    base_tds_url = catalog_url.split('/thredds/')[0]
    data_format = 'netcdf'
    vert_level = 100000
    columns = np.array(['temperature',
                        'wind_speed',
                        'total_clouds',
                        'low_clouds',
                        'mid_clouds',
                        'high_clouds',
                        'dni',
                        'dhi',
                        'ghi', ])

    def __init__(self, model_type, model_name, set_type):
        self.model_type = model_type
        self.model_name = model_name
        self.set_type = set_type
        self.catalog = TDSCatalog(self.catalog_url)
        self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href)
        self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys()))
        
        try:
            model_url = self.fm_models.catalog_refs[model_name].href
        except ParseError:
            raise ParseError(self.model_name + ' model may be unavailable.')

        try:
            self.model = TDSCatalog(model_url)
        except HTTPError:
            raise HTTPError(self.model_name + ' model may be unavailable.')

        self.datasets_list = list(self.model.datasets.keys())
        self.set_dataset()


    def set_dataset(self):
        '''
        Retreives the designated dataset, creates NCSS object, and 
        initiates a NCSS query.

        '''
        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()        

    def set_query_latlon(self):
        '''
        Sets the NCSS query location latitude and longitude.

        '''
        if isinstance(self.longitude, list):
            self.lbox = True
            # west, east, south, north
            self.query.lonlat_box(self.latitude[0], self.latitude[1], 
                                    self.longitude[0], self.longitude[1])
        else:
            self.lbox = False
            self.query.lonlat_point(self.longitude, self.latitude)

    def set_query_time(self):
        '''
        Sets the NCSS query time range.

        as: single or range

        '''        
        if len(self.utctime) == 1:
            self.query.time(pd.to_datetime(self.utctime)[0])
        else:
            self.query.time_range(pd.to_datetime(self.utctime)[0], 
                pd.to_datetime(self.utctime)[-1])
    
    def set_location(self, time):
        '''
        Sets the location for 

        Parameters
        ----------
        time: datetime or DatetimeIndex
            Time range of the query.
        '''
        if isinstance(time, datetime.datetime):
            tzinfo = time.tzinfo
        else:
            tzinfo = time.tz

        if tzinfo is None:
            self.location = Location(self.latitude, self.longitude)
        else:
            self.location = Location(self.latitude, self.longitude, tz=tzinfo)

    def get_query_data(self, latitude, longitude, time, vert_level=None, 
        variables=None):
        '''
        Submits a query to the UNIDATA servers using siphon NCSS and 
        converts the netcdf data to a pandas DataFrame.

        Parameters
        ----------
        latitude: list
            A list of floats containing latitude values.
        longitude: list
            A list of floats containing longitude values.
        time: pd.datetimeindex
            Time range of interest.
        vert_level: float or integer
            Vertical altitude of interest.
        variables: dictionary
            Variables and common names being queried.

        Returns
        -------
        pd.DataFrame
        '''
        if vert_level is not None:
            self.vert_level = vert_level
        if variables is not None:
            self.variables = variables
            self.modelvariables = list(self.variables.keys())
            self.queryvariables = [self.variables[key] for key in \
                self.modelvariables]
            self.columns = self.modelvariables
            self.dataframe_variables = self.modelvariables
        

        self.latitude = latitude
        self.longitude = longitude
        self.set_query_latlon()
        self.set_location(time)

        self.utctime = localize_to_utc(time, self.location)
        self.set_query_time()

        self.query.vertical_level(self.vert_level)
        self.query.variables(*self.queryvariables)
        self.query.accept(self.data_format)
        netcdf_data = self.ncss.get_data(self.query)

        try:
            time_var = 'time'
            self.set_time(netcdf_data.variables[time_var])
        except KeyError:
            time_var = 'time1'
            self.set_time(netcdf_data.variables[time_var])

        self.data = self.netcdf2pandas(netcdf_data)

        self.set_variable_units(netcdf_data)
        self.set_variable_stdnames(netcdf_data)
        if self.__class__.__name__ is 'HRRR':
            self.calc_temperature(netcdf_data)
        self.convert_temperature()
        self.calc_wind(netcdf_data)
        self.calc_radiation(netcdf_data)

        self.data = self.data.tz_convert(self.location.tz)

        netcdf_data.close()        

        return self.data       

    def netcdf2pandas(self, data):
        '''
        Transforms data from netcdf  to pandas DataFrame.

        Currently only supports one-dimensional netcdf data.

        Parameters
        ----------
        data: netcdf
            Data returned from UNIDATA NCSS query.

        Returns
        -------
        pd.DataFrame
        '''
        if not self.lbox:
            ''' one-dimensional data '''
            data_dict = {}
            for var in self.dataframe_variables:
                data_dict[var] = pd.Series(
                    data[self.variables[var]][:].squeeze(), index=self.utctime)
            return pd.DataFrame(data_dict, columns=self.columns)
        else:
            return pd.DataFrame(columns=self.columns, index=self.utctime)

    def set_time(self, time):
        '''
        Converts time data into a pandas date object.

        Parameters
        ----------
        time: netcdf
            Contains time information.

        Returns
        -------
        pandas.DatetimeIndex
        '''
        times = num2date(time[:].squeeze(), time.units)
        self.time = pd.DatetimeIndex(pd.Series(times), tz='UTC')
        self.time = self.time.tz_convert(self.location.tz)
        self.utctime = localize_to_utc(self.time, self.location.tz)

    def set_variable_units(self, data):
        '''
        Extracts variable unit information from netcdf data.

        Parameters
        ----------
        data: netcdf
            Contains queried variable information.

        '''
        self.var_units = {}
        for var in self.variables:
            self.var_units[var] = data[self.variables[var]].units

    def set_variable_stdnames(self, data):
        '''
        Extracts standard names from netcdf data.

        Parameters
        ----------
        data: netcdf
            Contains queried variable information.

        '''
        self.var_stdnames = {}
        for var in self.variables:
            try:
                self.var_stdnames[var] = \
                    data[self.variables[var]].standard_name
            except AttributeError:
                self.var_stdnames[var] = var

    def calc_radiation(self, data, cloud_type='total_clouds'):
        '''
        Determines shortwave radiation values if they are missing from 
        the model data.

        Parameters
        ----------
        data: netcdf
            Query data formatted in netcdf format.
        cloud_type: string
            Type of cloud cover to use for calculating radiation values.
        '''
        self.rad_type = {}
        if not self.lbox and cloud_type in self.modelvariables:           
            cloud_prct = self.data[cloud_type]
            solpos = get_solarposition(self.time, self.location)
            self.zenith = np.array(solpos.zenith.tz_convert('UTC'))
            for rad in ['dni','dhi','ghi']:
                if self.model_name is 'HRRR_ESRL':
                    # HRRR_ESRL is the only model with the 
                    # correct equation of time.
                    if rad in self.modelvariables:
                        self.data[rad] = pd.Series(
                            data[self.variables[rad]][:].squeeze(), 
                            index=self.time)
                        self.rad_type[rad] = 'forecast'
                        self.data[rad].fillna(0, inplace=True)
                else:
                    for rad in ['dni','dhi','ghi']:
                        self.rad_type[rad] = 'liujordan'
                        self.data[rad] = liujordan(self.zenith, cloud_prct)[rad]
                        self.data[rad].fillna(0, inplace=True)

            for var in ['dni', 'dhi', 'ghi']:
                self.data[var].fillna(0, inplace=True)
                self.var_units[var] = '$W m^{-2}$'

    def convert_temperature(self):
        '''
        Converts Kelvin to celsius.

        '''
        if 'Temperature_surface' in self.queryvariables or 'Temperature_isobaric' in self.queryvariables:
            self.data['temperature'] -= 273.15
            self.var_units['temperature'] = 'C'

    def calc_temperature(self, data):
        '''
        Calculates temperature (in degrees C) from isobaric temperature.

        Parameters
        ----------
        data: netcdf
            Query data in netcdf format.
        '''
        P = data['Pressure_surface'][:].squeeze() / 100.0
        Tiso = data['Temperature_isobaric'][:].squeeze()
        Td = data['Dewpoint_temperature_isobaric'][:].squeeze() - 273.15
        e = 6.11 * 10**((7.5 * Td) / (Td + 273.3))
        w = 0.622 * (e / (P - e))

        T = Tiso - ((2.501 * 10.**6) / 1005.7) * w

        self.data['temperature'] = T

    def calc_wind(self, data):
        '''
        Computes wind speed. 

        In some cases only gust wind speed is available. The wind_type 
        attribute will indicate the type of wind speed that is present.

        Parameters
        ----------
        data: netcdf
            Query data in netcdf format.
        '''
        if not self.lbox:
            if 'u-component_of_wind_isobaric' in self.queryvariables and \
                'v-component_of_wind_isobaric' in self.queryvariables:
                wind_data = np.sqrt(\
                    data['u-component_of_wind_isobaric'][:].squeeze()**2 +
                    data['v-component_of_wind_isobaric'][:].squeeze()**2)
                self.wind_type = 'component'
            elif 'Wind_speed_gust_surface' in self.queryvariables:
                wind_data = data['Wind_speed_gust_surface'][:].squeeze()
                self.wind_type = 'gust'

            if 'wind_speed' in self.data:
                self.data['wind_speed'] = pd.Series(wind_data, index=self.time)
                self.var_units['wind_speed'] = 'm/s'
Example #7
0
class TestNCSS(object):
    server = 'http://thredds.ucar.edu/thredds/ncss/'
    urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2'

    @recorder.use_cassette('ncss_test_metadata')
    def setup(self):
        dt = datetime(2015, 6, 12, 15, 0, 0)
        self.ncss = NCSS(self.server + self.urlPath)
        self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt)
        self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')

    def test_good_query(self):
        assert self.ncss.validate_query(self.nq)

    def test_bad_query(self):
        self.nq.variables('foo')
        assert not self.ncss.validate_query(self.nq)

    def test_bad_query_no_vars(self):
        self.nq.var.clear()
        assert not self.ncss.validate_query(self.nq)

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_xml_point(self):
        self.nq.accept('xml')
        xml_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in xml_data
        assert 'Relative_humidity_isobaric' in xml_data
        assert xml_data['lat'][0] == 40
        assert xml_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_csv_point(self):
        self.nq.accept('csv')
        csv_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in csv_data
        assert 'Relative_humidity_isobaric' in csv_data
        assert csv_data['lat'][0] == 40
        assert csv_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unit_handler_csv(self):
        self.nq.accept('csv')
        self.ncss.unit_handler = tuple_unit_handler
        csv_data = self.ncss.get_data(self.nq)

        temp = csv_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = csv_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_unit_handler_xml(self):
        self.nq.accept('xml')
        self.ncss.unit_handler = tuple_unit_handler
        xml_data = self.ncss.get_data(self.nq)

        temp = xml_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = xml_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_netcdf_point')
    def test_netcdf_point(self):
        self.nq.accept('netcdf')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_netcdf4_point')
    def test_netcdf4_point(self):
        self.nq.accept('netcdf4')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_vertical_level')
    def test_vertical_level(self):
        self.nq.accept('csv').vertical_level(50000)
        csv_data = self.ncss.get_data(self.nq)

        assert str(csv_data['Temperature_isobaric'])[:6] == '263.39'

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_raw_csv(self):
        self.nq.accept('csv')
        csv_data = self.ncss.get_data_raw(self.nq)

        assert csv_data.startswith(b'date,lat')

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unknown_mime(self):
        self.nq.accept('csv')
        with response_context():
            csv_data = self.ncss.get_data(self.nq)
            assert csv_data.startswith(b'date,lat')
Example #8
0
class TestNCSS(object):
    """Test NCSS queries and response parsing."""

    server = 'http://thredds.ucar.edu/thredds/ncss/'
    urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2'

    @recorder.use_cassette('ncss_test_metadata')
    def setup(self):
        """Set up for tests with a default valid query."""
        dt = datetime(2015, 6, 12, 15, 0, 0)
        self.ncss = NCSS(self.server + self.urlPath)
        self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt)
        self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')

    def test_good_query(self):
        """Test that a good query is properly validated."""
        assert self.ncss.validate_query(self.nq)

    def test_bad_query(self):
        """Test that a query with an unknown variable is invalid."""
        self.nq.variables('foo')
        assert not self.ncss.validate_query(self.nq)

    def test_empty_query(self):
        """Test that an empty query is invalid."""
        query = self.ncss.query()
        res = self.ncss.validate_query(query)
        assert not res
        assert not isinstance(res, set)

    def test_bad_query_no_vars(self):
        """Test that a query without variables is invalid."""
        self.nq.var.clear()
        assert not self.ncss.validate_query(self.nq)

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_xml_point(self):
        """Test parsing XML point returns."""
        self.nq.accept('xml')
        xml_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in xml_data
        assert 'Relative_humidity_isobaric' in xml_data
        assert xml_data['lat'][0] == 40
        assert xml_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_csv_point(self):
        """Test parsing CSV point returns."""
        self.nq.accept('csv')
        csv_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in csv_data
        assert 'Relative_humidity_isobaric' in csv_data
        assert csv_data['lat'][0] == 40
        assert csv_data['lon'][0] == -105

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unit_handler_csv(self):
        """Test unit-handling from CSV returns."""
        self.nq.accept('csv')
        self.ncss.unit_handler = tuple_unit_handler
        csv_data = self.ncss.get_data(self.nq)

        temp = csv_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = csv_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_unit_handler_xml(self):
        """Test unit-handling from XML returns."""
        self.nq.accept('xml')
        self.ncss.unit_handler = tuple_unit_handler
        xml_data = self.ncss.get_data(self.nq)

        temp = xml_data['Temperature_isobaric']
        assert len(temp) == 2
        assert temp[1] == 'K'

        relh = xml_data['Relative_humidity_isobaric']
        assert len(relh) == 2
        assert relh[1] == '%'

    @recorder.use_cassette('ncss_gfs_netcdf_point')
    def test_netcdf_point(self):
        """Test handling of netCDF point returns."""
        self.nq.accept('netcdf')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_netcdf4_point')
    def test_netcdf4_point(self):
        """Test handling of netCDF4 point returns."""
        self.nq.accept('netcdf4')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        assert nc.variables['latitude'][0] == 40
        assert nc.variables['longitude'][0] == -105

    @recorder.use_cassette('ncss_gfs_vertical_level')
    def test_vertical_level(self):
        """Test data return from a single vertical level is correct."""
        self.nq.accept('csv').vertical_level(50000)
        csv_data = self.ncss.get_data(self.nq)

        np.testing.assert_almost_equal(csv_data['Temperature_isobaric'], np.array([263.40]), 2)

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_raw_csv(self):
        """Test CSV point return from a GFS request."""
        self.nq.accept('csv')
        csv_data = self.ncss.get_data_raw(self.nq)

        assert csv_data.startswith(b'date,lat')

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unknown_mime(self):
        """Test handling of unknown mimetypes."""
        self.nq.accept('csv')
        with response_context():
            csv_data = self.ncss.get_data(self.nq)
            assert csv_data.startswith(b'date,lat')
Example #9
0
class ForecastModel(object):
    """
    An object for querying and holding forecast model information for
    use within the pvlib library.

    Simplifies use of siphon library on a THREDDS server.

    Parameters
    ----------
    model_type: string
        UNIDATA category in which the model is located.
    model_name: string
        Name of the UNIDATA forecast model.
    set_type: string
        Model dataset type.

    Attributes
    ----------
    access_url: string
        URL specifying the dataset from data will be retrieved.
    base_tds_url : string
        The top level server address
    catalog_url : string
        The url path of the catalog to parse.
    data: pd.DataFrame
        Data returned from the query.
    data_format: string
        Format of the forecast data being requested from UNIDATA.
    dataset: Dataset
        Object containing information used to access forecast data.
    dataframe_variables: list
        Model variables that are present in the data.
    datasets_list: list
        List of all available datasets.
    fm_models: Dataset
        TDSCatalog object containing all available
        forecast models from UNIDATA.
    fm_models_list: list
        List of all available forecast models from UNIDATA.
    latitude: list
        A list of floats containing latitude values.
    location: Location
        A pvlib Location object containing geographic quantities.
    longitude: list
        A list of floats containing longitude values.
    lbox: boolean
        Indicates the use of a location bounding box.
    ncss: NCSS object
        NCSS
    model_name: string
        Name of the UNIDATA forecast model.
    model: Dataset
        A dictionary of Dataset object, whose keys are the name of the
        dataset's name.
    model_url: string
        The url path of the dataset to parse.
    modelvariables: list
        Common variable names that correspond to queryvariables.
    query: NCSS query object
        NCSS object used to complete the forecast data retrival.
    queryvariables: list
        Variables that are used to query the THREDDS Data Server.
    time: DatetimeIndex
        Time range.
    variables: dict
        Defines the variables to obtain from the weather
        model and how they should be renamed to common variable names.
    units: dict
        Dictionary containing the units of the standard variables
        and the model specific variables.
    vert_level: float or integer
        Vertical altitude for query data.
    """

    access_url_key = 'NetcdfSubset'
    catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml'
    base_tds_url = catalog_url.split('/thredds/')[0]
    data_format = 'netcdf'
    vert_level = 100000

    units = {
        'temp_air': 'C',
        'wind_speed': 'm/s',
        'ghi': 'W/m^2',
        'ghi_raw': 'W/m^2',
        'dni': 'W/m^2',
        'dhi': 'W/m^2',
        'total_clouds': '%',
        'low_clouds': '%',
        'mid_clouds': '%',
        'high_clouds': '%'}

    def __init__(self, model_type, model_name, set_type):
        self.model_type = model_type
        self.model_name = model_name
        self.set_type = set_type
        self.catalog = TDSCatalog(self.catalog_url)
        self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href)
        self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys()))

        try:
            model_url = self.fm_models.catalog_refs[model_name].href
        except ParseError:
            raise ParseError(self.model_name + ' model may be unavailable.')

        try:
            self.model = TDSCatalog(model_url)
        except HTTPError:
            try:
                self.model = TDSCatalog(model_url)
            except HTTPError:
                raise HTTPError(self.model_name + ' model may be unavailable.')

        self.datasets_list = list(self.model.datasets.keys())
        self.set_dataset()

    def __repr__(self):
        return '{}, {}'.format(self.model_name, self.set_type)

    def set_dataset(self):
        '''
        Retrieves the designated dataset, creates NCSS object, and
        creates a NCSS query object.
        '''

        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()

    def set_query_latlon(self):
        '''
        Sets the NCSS query location latitude and longitude.
        '''

        if (isinstance(self.longitude, list) and
            isinstance(self.latitude, list)):
            self.lbox = True
            # west, east, south, north
            self.query.lonlat_box(self.latitude[0], self.latitude[1],
                                  self.longitude[0], self.longitude[1])
        else:
            self.lbox = False
            self.query.lonlat_point(self.longitude, self.latitude)

    def set_location(self, time, latitude, longitude):
        '''
        Sets the location for the query.

        Parameters
        ----------
        time: datetime or DatetimeIndex
            Time range of the query.
        '''
        if isinstance(time, datetime.datetime):
            tzinfo = time.tzinfo
        else:
            tzinfo = time.tz

        if tzinfo is None:
            self.location = Location(latitude, longitude)
        else:
            self.location = Location(latitude, longitude, tz=tzinfo)

    def get_data(self, latitude, longitude, start, end,
                 vert_level=None, query_variables=None,
                 close_netcdf_data=True):
        """
        Submits a query to the UNIDATA servers using Siphon NCSS and
        converts the netcdf data to a pandas DataFrame.

        Parameters
        ----------
        latitude: float
            The latitude value.
        longitude: float
            The longitude value.
        start: datetime or timestamp
            The start time.
        end: datetime or timestamp
            The end time.
        vert_level: None, float or integer
            Vertical altitude of interest.
        variables: None or list
            If None, uses self.variables.
        close_netcdf_data: bool
            Controls if the temporary netcdf data file should be closed.
            Set to False to access the raw data.

        Returns
        -------
        forecast_data : DataFrame
            column names are the weather model's variable names.
        """
        if vert_level is not None:
            self.vert_level = vert_level

        if query_variables is None:
            self.query_variables = list(self.variables.values())
        else:
            self.query_variables = query_variables

        self.latitude = latitude
        self.longitude = longitude
        self.set_query_latlon()  # modifies self.query
        self.set_location(start, latitude, longitude)

        self.start = start
        self.end = end
        self.query.time_range(self.start, self.end)

        self.query.vertical_level(self.vert_level)
        self.query.variables(*self.query_variables)
        self.query.accept(self.data_format)

        self.netcdf_data = self.ncss.get_data(self.query)

        # might be better to go to xarray here so that we can handle
        # higher dimensional data for more advanced applications
        self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables)

        if close_netcdf_data:
            self.netcdf_data.close()

        return self.data

    def process_data(self, data, **kwargs):
        """
        Defines the steps needed to convert raw forecast data
        into processed forecast data. Most forecast models implement
        their own version of this method which also call this one.

        Parameters
        ----------
        data: DataFrame
            Raw forecast data

        Returns
        -------
        data: DataFrame
            Processed forecast data.
        """
        data = self.rename(data)
        return data

    def get_processed_data(self, *args, **kwargs):
        """
        Get and process forecast data.

        Parameters
        ----------
        *args: positional arguments
            Passed to get_data
        **kwargs: keyword arguments
            Passed to get_data and process_data

        Returns
        -------
        data: DataFrame
            Processed forecast data
        """
        return self.process_data(self.get_data(*args, **kwargs), **kwargs)

    def rename(self, data, variables=None):
        """
        Renames the columns according the variable mapping.

        Parameters
        ----------
        data: DataFrame
        variables: None or dict
            If None, uses self.variables

        Returns
        -------
        data: DataFrame
            Renamed data.
        """
        if variables is None:
            variables = self.variables
        return data.rename(columns={y: x for x, y in variables.items()})

    def _netcdf2pandas(self, netcdf_data, query_variables):
        """
        Transforms data from netcdf to pandas DataFrame.

        Parameters
        ----------
        data: netcdf
            Data returned from UNIDATA NCSS query.
        query_variables: list
            The variables requested.

        Returns
        -------
        pd.DataFrame
        """
        # set self.time
        try:
            time_var = 'time'
            self.set_time(netcdf_data.variables[time_var])
        except KeyError:
            # which model does this dumb thing?
            time_var = 'time1'
            self.set_time(netcdf_data.variables[time_var])

        data_dict = {key: data[:].squeeze() for key, data in
                     netcdf_data.variables.items() if key in query_variables}

        return pd.DataFrame(data_dict, index=self.time)

    def set_time(self, time):
        '''
        Converts time data into a pandas date object.

        Parameters
        ----------
        time: netcdf
            Contains time information.

        Returns
        -------
        pandas.DatetimeIndex
        '''
        times = num2date(time[:].squeeze(), time.units)
        self.time = pd.DatetimeIndex(pd.Series(times), tz=self.location.tz)

    def cloud_cover_to_ghi_linear(self, cloud_cover, ghi_clear, offset=35,
                                  **kwargs):
        """
        Convert cloud cover to GHI using a linear relationship.

        0% cloud cover returns ghi_clear.

        100% cloud cover returns offset*ghi_clear.

        Parameters
        ----------
        cloud_cover: numeric
            Cloud cover in %.
        ghi_clear: numeric
            GHI under clear sky conditions.
        offset: numeric
            Determines the minimum GHI.
        kwargs
            Not used.

        Returns
        -------
        ghi: numeric
            Estimated GHI.

        References
        ----------
        Larson et. al. "Day-ahead forecasting of solar power output from
        photovoltaic plants in the American Southwest" Renewable Energy
        91, 11-20 (2016).
        """

        offset = offset / 100.
        cloud_cover = cloud_cover / 100.
        ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear
        return ghi

    def cloud_cover_to_irradiance_clearsky_scaling(self, cloud_cover,
                                                   method='linear',
                                                   **kwargs):
        """
        Estimates irradiance from cloud cover in the following steps:

        1. Determine clear sky GHI using Ineichen model and
           climatological turbidity.
        2. Estimate cloudy sky GHI using a function of
           cloud_cover e.g.
           :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear`
        3. Estimate cloudy sky DNI using the DISC model.
        4. Calculate DHI from DNI and DHI.

        Parameters
        ----------
        cloud_cover : Series
            Cloud cover in %.
        method : str
            Method for converting cloud cover to GHI.
            'linear' is currently the only option.
        **kwargs
            Passed to the method that does the conversion

        Returns
        -------
        irrads : DataFrame
            Estimated GHI, DNI, and DHI.
        """
        solpos = self.location.get_solarposition(cloud_cover.index)
        cs = self.location.get_clearsky(cloud_cover.index, model='ineichen',
                                        solar_position=solpos)

        method = method.lower()
        if method == 'linear':
            ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'],
                                                 **kwargs)
        else:
            raise ValueError('invalid method argument')

        dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni']
        dhi = ghi - dni * np.cos(np.radians(solpos['zenith']))

        irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0)
        return irrads

    def cloud_cover_to_transmittance_linear(self, cloud_cover, offset=0.75,
                                            **kwargs):
        """
        Convert cloud cover to atmospheric transmittance using a linear
        model.

        0% cloud cover returns offset.

        100% cloud cover returns 0.

        Parameters
        ----------
        cloud_cover : numeric
            Cloud cover in %.
        offset : numeric
            Determines the maximum transmittance.
        kwargs
            Not used.

        Returns
        -------
        ghi : numeric
            Estimated GHI.
        """
        transmittance = ((100.0 - cloud_cover) / 100.0) * 0.75

        return transmittance

    def cloud_cover_to_irradiance_liujordan(self, cloud_cover, **kwargs):
        """
        Estimates irradiance from cloud cover in the following steps:

        1. Determine transmittance using a function of cloud cover e.g.
           :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear`
        2. Calculate GHI, DNI, DHI using the
           :py:func:`pvlib.irradiance.liujordan` model

        Parameters
        ----------
        cloud_cover : Series

        Returns
        -------
        irradiance : DataFrame
            Columns include ghi, dni, dhi
        """
        # in principle, get_solarposition could use the forecast
        # pressure, temp, etc., but the cloud cover forecast is not
        # accurate enough to justify using these minor corrections
        solar_position = self.location.get_solarposition(cloud_cover.index)
        dni_extra = extraradiation(cloud_cover.index)
        airmass = self.location.get_airmass(cloud_cover.index)

        transmittance = self.cloud_cover_to_transmittance_linear(cloud_cover,
                                                                 **kwargs)

        irrads = liujordan(solar_position['apparent_zenith'],
                           transmittance, airmass['airmass_absolute'],
                           dni_extra=dni_extra)
        irrads = irrads.fillna(0)

        return irrads

    def cloud_cover_to_irradiance(self, cloud_cover, how='clearsky_scaling',
                                  **kwargs):
        """
        Convert cloud cover to irradiance. A wrapper method.

        Parameters
        ----------
        cloud_cover : Series
        how : str
            Selects the method for conversion. Can be one of
            clearsky_scaling or liujordan.
        **kwargs
            Passed to the selected method.

        Returns
        -------
        irradiance : DataFrame
            Columns include ghi, dni, dhi
        """

        how = how.lower()
        if how == 'clearsky_scaling':
            irrads = self.cloud_cover_to_irradiance_clearsky_scaling(
                cloud_cover, **kwargs)
        elif how == 'liujordan':
            irrads = self.cloud_cover_to_irradiance_liujordan(
                cloud_cover, **kwargs)
        else:
            raise ValueError('invalid how argument')

        return irrads

    def kelvin_to_celsius(self, temperature):
        """
        Converts Kelvin to celsius.

        Parameters
        ----------
        temperature: numeric

        Returns
        -------
        temperature: numeric
        """
        return temperature - 273.15

    def isobaric_to_ambient_temperature(self, data):
        """
        Calculates temperature from isobaric temperature.

        Parameters
        ----------
        data: DataFrame
            Must contain columns pressure, temperature_iso,
            temperature_dew_iso. Input temperature in K.

        Returns
        -------
        temperature : Series
            Temperature in K
        """

        P = data['pressure'] / 100.0
        Tiso = data['temperature_iso']
        Td = data['temperature_dew_iso'] - 273.15

        # saturation water vapor pressure
        e = 6.11 * 10**((7.5 * Td) / (Td + 273.3))

        # saturation water vapor mixing ratio
        w = 0.622 * (e / (P - e))

        T = Tiso - ((2.501 * 10.**6) / 1005.7) * w

        return T

    def uv_to_speed(self, data):
        """
        Computes wind speed from wind components.

        Parameters
        ----------
        data : DataFrame
            Must contain the columns 'wind_speed_u' and 'wind_speed_v'.

        Returns
        -------
        wind_speed : Series
        """
        wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2)

        return wind_speed

    def gust_to_speed(self, data, scaling=1/1.4):
        """
        Computes standard wind speed from gust.
        Very approximate and location dependent.

        Parameters
        ----------
        data : DataFrame
            Must contain the column 'wind_speed_gust'.

        Returns
        -------
        wind_speed : Series
        """
        wind_speed = data['wind_speed_gust'] * scaling

        return wind_speed
Example #10
0
class ForecastModel(object):
    """
    An object for querying and holding forecast model information for
    use within the pvlib library.

    Simplifies use of siphon library on a THREDDS server.

    Parameters
    ----------
    model_type: string
        UNIDATA category in which the model is located.
    model_name: string
        Name of the UNIDATA forecast model.
    set_type: string
        Model dataset type.

    Attributes
    ----------
    access_url: string
        URL specifying the dataset from data will be retrieved.
    base_tds_url : string
        The top level server address
    catalog_url : string
        The url path of the catalog to parse.
    data: pd.DataFrame
        Data returned from the query.
    data_format: string
        Format of the forecast data being requested from UNIDATA.
    dataset: Dataset
        Object containing information used to access forecast data.
    dataframe_variables: list
        Model variables that are present in the data.
    datasets_list: list
        List of all available datasets.
    fm_models: Dataset
        TDSCatalog object containing all available
        forecast models from UNIDATA.
    fm_models_list: list
        List of all available forecast models from UNIDATA.
    latitude: list
        A list of floats containing latitude values.
    location: Location
        A pvlib Location object containing geographic quantities.
    longitude: list
        A list of floats containing longitude values.
    lbox: boolean
        Indicates the use of a location bounding box.
    ncss: NCSS object
        NCSS
    model_name: string
        Name of the UNIDATA forecast model.
    model: Dataset
        A dictionary of Dataset object, whose keys are the name of the
        dataset's name.
    model_url: string
        The url path of the dataset to parse.
    modelvariables: list
        Common variable names that correspond to queryvariables.
    query: NCSS query object
        NCSS object used to complete the forecast data retrival.
    queryvariables: list
        Variables that are used to query the THREDDS Data Server.
    time: DatetimeIndex
        Time range.
    variables: dict
        Defines the variables to obtain from the weather
        model and how they should be renamed to common variable names.
    units: dict
        Dictionary containing the units of the standard variables
        and the model specific variables.
    vert_level: float or integer
        Vertical altitude for query data.
    """

    access_url_key = 'NetcdfSubset'
    catalog_url = 'https://thredds.ucar.edu/thredds/catalog.xml'
    base_tds_url = catalog_url.split('/thredds/')[0]
    data_format = 'netcdf'

    units = {
        'temp_air': 'C',
        'wind_speed': 'm/s',
        'ghi': 'W/m^2',
        'ghi_raw': 'W/m^2',
        'dni': 'W/m^2',
        'dhi': 'W/m^2',
        'total_clouds': '%',
        'low_clouds': '%',
        'mid_clouds': '%',
        'high_clouds': '%'
    }

    def __init__(self, model_type, model_name, set_type, vert_level=None):
        self.model_type = model_type
        self.model_name = model_name
        self.set_type = set_type
        self.connected = False
        self.vert_level = vert_level

    def connect_to_catalog(self):
        self.catalog = TDSCatalog(self.catalog_url)
        self.fm_models = TDSCatalog(
            self.catalog.catalog_refs[self.model_type].href)
        self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys()))

        try:
            model_url = self.fm_models.catalog_refs[self.model_name].href
        except ParseError:
            raise ParseError(self.model_name + ' model may be unavailable.')

        try:
            self.model = TDSCatalog(model_url)
        except HTTPError:
            try:
                self.model = TDSCatalog(model_url)
            except HTTPError:
                raise HTTPError(self.model_name + ' model may be unavailable.')

        self.datasets_list = list(self.model.datasets.keys())
        self.set_dataset()
        self.connected = True

    def __repr__(self):
        return '{}, {}'.format(self.model_name, self.set_type)

    def set_dataset(self):
        '''
        Retrieves the designated dataset, creates NCSS object, and
        creates a NCSS query object.
        '''

        keys = list(self.model.datasets.keys())
        labels = [item.split()[0].lower() for item in keys]
        if self.set_type == 'best':
            self.dataset = self.model.datasets[keys[labels.index('best')]]
        elif self.set_type == 'latest':
            self.dataset = self.model.datasets[keys[labels.index('latest')]]
        elif self.set_type == 'full':
            self.dataset = self.model.datasets[keys[labels.index('full')]]

        self.access_url = self.dataset.access_urls[self.access_url_key]
        self.ncss = NCSS(self.access_url)
        self.query = self.ncss.query()

    def set_query_time_range(self, start, end):
        """
        Parameters
        ----------
        start : datetime.datetime, pandas.Timestamp
            Must be tz-localized.
        end : datetime.datetime, pandas.Timestamp
            Must be tz-localized.

        Notes
        -----
        Assigns ``self.start``, ``self.end``. Modifies ``self.query``
        """
        self.start = pd.Timestamp(start)
        self.end = pd.Timestamp(end)
        if self.start.tz is None or self.end.tz is None:
            raise TypeError('start and end must be tz-localized')
        self.query.time_range(self.start, self.end)

    def set_query_latlon(self):
        '''
        Sets the NCSS query location latitude and longitude.
        '''

        if (isinstance(self.longitude, list)
                and isinstance(self.latitude, list)):
            self.lbox = True
            # west, east, south, north
            self.query.lonlat_box(self.longitude[0], self.longitude[1],
                                  self.latitude[0], self.latitude[1])
        else:
            self.lbox = False
            self.query.lonlat_point(self.longitude, self.latitude)

    def set_location(self, tz, latitude, longitude):
        '''
        Sets the location for the query.

        Parameters
        ----------
        tz: tzinfo
            Timezone of the query
        latitude: float
            Latitude of the query
        longitude: float
            Longitude of the query

        Notes
        -----
        Assigns ``self.location``.
        '''
        self.location = Location(latitude, longitude, tz=tz)

    def get_data(self,
                 latitude,
                 longitude,
                 start,
                 end,
                 vert_level=None,
                 query_variables=None,
                 close_netcdf_data=True,
                 **kwargs):
        """
        Submits a query to the UNIDATA servers using Siphon NCSS and
        converts the netcdf data to a pandas DataFrame.

        Parameters
        ----------
        latitude: float
            The latitude value.
        longitude: float
            The longitude value.
        start: datetime or timestamp
            The start time.
        end: datetime or timestamp
            The end time.
        vert_level: None, float or integer, default None
            Vertical altitude of interest.
        query_variables: None or list, default None
            If None, uses self.variables.
        close_netcdf_data: bool, default True
            Controls if the temporary netcdf data file should be closed.
            Set to False to access the raw data.
        **kwargs:
            Additional keyword arguments are silently ignored.

        Returns
        -------
        forecast_data : DataFrame
            column names are the weather model's variable names.
        """

        if not self.connected:
            self.connect_to_catalog()

        if vert_level is not None:
            self.vert_level = vert_level

        if query_variables is None:
            self.query_variables = list(self.variables.values())
        else:
            self.query_variables = query_variables

        self.set_query_time_range(start, end)

        self.latitude = latitude
        self.longitude = longitude
        self.set_query_latlon()  # modifies self.query
        self.set_location(self.start.tz, latitude, longitude)

        if self.vert_level is not None:
            self.query.vertical_level(self.vert_level)

        self.query.variables(*self.query_variables)
        self.query.accept(self.data_format)

        self.netcdf_data = self.ncss.get_data(self.query)

        # might be better to go to xarray here so that we can handle
        # higher dimensional data for more advanced applications
        self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables,
                                        self.start, self.end)

        if close_netcdf_data:
            self.netcdf_data.close()

        return self.data

    def process_data(self, data, **kwargs):
        """
        Defines the steps needed to convert raw forecast data
        into processed forecast data. Most forecast models implement
        their own version of this method which also call this one.

        Parameters
        ----------
        data: DataFrame
            Raw forecast data

        Returns
        -------
        data: DataFrame
            Processed forecast data.
        """
        data = self.rename(data)
        return data

    def get_processed_data(self, *args, **kwargs):
        """
        Get and process forecast data.

        Parameters
        ----------
        *args: positional arguments
            Passed to get_data
        **kwargs: keyword arguments
            Passed to get_data and process_data

        Returns
        -------
        data: DataFrame
            Processed forecast data
        """
        return self.process_data(self.get_data(*args, **kwargs), **kwargs)

    def rename(self, data, variables=None):
        """
        Renames the columns according the variable mapping.

        Parameters
        ----------
        data: DataFrame
        variables: None or dict, default None
            If None, uses self.variables

        Returns
        -------
        data: DataFrame
            Renamed data.
        """
        if variables is None:
            variables = self.variables
        return data.rename(columns={y: x for x, y in variables.items()})

    def _netcdf2pandas(self, netcdf_data, query_variables, start, end):
        """
        Transforms data from netcdf to pandas DataFrame.

        Parameters
        ----------
        data: netcdf
            Data returned from UNIDATA NCSS query.
        query_variables: list
            The variables requested.
        start: Timestamp
            The start time
        end: Timestamp
            The end time

        Returns
        -------
        pd.DataFrame
        """
        # set self.time
        try:
            time_var = 'time'
            self.set_time(netcdf_data.variables[time_var])
        except KeyError:
            # which model does this dumb thing?
            time_var = 'time1'
            self.set_time(netcdf_data.variables[time_var])

        data_dict = {}
        for key, data in netcdf_data.variables.items():
            # if accounts for possibility of extra variable returned
            if key not in query_variables:
                continue
            squeezed = data[:].squeeze()

            # If the data is big endian, swap the byte order to make it
            # little endian
            if squeezed.dtype.byteorder == '>':
                squeezed = squeezed.byteswap().newbyteorder()
            if squeezed.ndim == 1:
                data_dict[key] = squeezed
            elif squeezed.ndim == 2:
                for num, data_level in enumerate(squeezed.T):
                    data_dict[key + '_' + str(num)] = data_level
            else:
                raise ValueError('cannot parse ndim > 2')

        data = pd.DataFrame(data_dict, index=self.time)
        # sometimes data is returned as hours since T0
        # where T0 is before start. Then the hours between
        # T0 and start are added *after* end. So sort and slice
        # to remove the garbage
        data = data.sort_index().loc[start:end]
        return data

    def set_time(self, time):
        '''
        Converts time data into a pandas date object.

        Parameters
        ----------
        time: netcdf
            Contains time information.

        Returns
        -------
        pandas.DatetimeIndex
        '''
        times = num2date(time[:].squeeze(),
                         time.units,
                         only_use_cftime_datetimes=False,
                         only_use_python_datetimes=True)
        self.time = pd.DatetimeIndex(pd.Series(times), tz=self.location.tz)

    def cloud_cover_to_ghi_linear(self,
                                  cloud_cover,
                                  ghi_clear,
                                  offset=35,
                                  **kwargs):
        """
        Convert cloud cover to GHI using a linear relationship.

        0% cloud cover returns ghi_clear.

        100% cloud cover returns offset*ghi_clear.

        Parameters
        ----------
        cloud_cover: numeric
            Cloud cover in %.
        ghi_clear: numeric
            GHI under clear sky conditions.
        offset: numeric, default 35
            Determines the minimum GHI.
        kwargs
            Not used.

        Returns
        -------
        ghi: numeric
            Estimated GHI.

        References
        ----------
        Larson et. al. "Day-ahead forecasting of solar power output from
        photovoltaic plants in the American Southwest" Renewable Energy
        91, 11-20 (2016).
        """

        offset = offset / 100.
        cloud_cover = cloud_cover / 100.
        ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear
        return ghi

    def cloud_cover_to_irradiance_clearsky_scaling(self,
                                                   cloud_cover,
                                                   method='linear',
                                                   **kwargs):
        """
        Estimates irradiance from cloud cover in the following steps:

        1. Determine clear sky GHI using Ineichen model and
           climatological turbidity.
        2. Estimate cloudy sky GHI using a function of
           cloud_cover e.g.
           :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear`
        3. Estimate cloudy sky DNI using the DISC model.
        4. Calculate DHI from DNI and GHI.

        Parameters
        ----------
        cloud_cover : Series
            Cloud cover in %.
        method : str, default 'linear'
            Method for converting cloud cover to GHI.
            'linear' is currently the only option.
        **kwargs
            Passed to the method that does the conversion

        Returns
        -------
        irrads : DataFrame
            Estimated GHI, DNI, and DHI.
        """
        solpos = self.location.get_solarposition(cloud_cover.index)
        cs = self.location.get_clearsky(cloud_cover.index,
                                        model='ineichen',
                                        solar_position=solpos)

        method = method.lower()
        if method == 'linear':
            ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'],
                                                 **kwargs)
        else:
            raise ValueError('invalid method argument')

        dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni']
        dhi = ghi - dni * np.cos(np.radians(solpos['zenith']))

        irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0)
        return irrads

    def cloud_cover_to_transmittance_linear(self,
                                            cloud_cover,
                                            offset=0.75,
                                            **kwargs):
        """
        Convert cloud cover to atmospheric transmittance using a linear
        model.

        0% cloud cover returns offset.

        100% cloud cover returns 0.

        Parameters
        ----------
        cloud_cover : numeric
            Cloud cover in %.
        offset : numeric, default 0.75
            Determines the maximum transmittance.
        kwargs
            Not used.

        Returns
        -------
        ghi : numeric
            Estimated GHI.
        """
        transmittance = ((100.0 - cloud_cover) / 100.0) * offset

        return transmittance

    def cloud_cover_to_irradiance_liujordan(self, cloud_cover, **kwargs):
        """
        Estimates irradiance from cloud cover in the following steps:

        1. Determine transmittance using a function of cloud cover e.g.
           :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear`
        2. Calculate GHI, DNI, DHI using the
           :py:func:`pvlib.irradiance.liujordan` model

        Parameters
        ----------
        cloud_cover : Series

        Returns
        -------
        irradiance : DataFrame
            Columns include ghi, dni, dhi
        """
        # in principle, get_solarposition could use the forecast
        # pressure, temp, etc., but the cloud cover forecast is not
        # accurate enough to justify using these minor corrections
        solar_position = self.location.get_solarposition(cloud_cover.index)
        dni_extra = get_extra_radiation(cloud_cover.index)
        airmass = self.location.get_airmass(cloud_cover.index)

        transmittance = self.cloud_cover_to_transmittance_linear(
            cloud_cover, **kwargs)

        irrads = liujordan(solar_position['apparent_zenith'],
                           transmittance,
                           airmass['airmass_absolute'],
                           dni_extra=dni_extra)
        irrads = irrads.fillna(0)

        return irrads

    def cloud_cover_to_irradiance(self,
                                  cloud_cover,
                                  how='clearsky_scaling',
                                  **kwargs):
        """
        Convert cloud cover to irradiance. A wrapper method.

        Parameters
        ----------
        cloud_cover : Series
        how : str, default 'clearsky_scaling'
            Selects the method for conversion. Can be one of
            clearsky_scaling or liujordan.
        **kwargs
            Passed to the selected method.

        Returns
        -------
        irradiance : DataFrame
            Columns include ghi, dni, dhi
        """

        how = how.lower()
        if how == 'clearsky_scaling':
            irrads = self.cloud_cover_to_irradiance_clearsky_scaling(
                cloud_cover, **kwargs)
        elif how == 'liujordan':
            irrads = self.cloud_cover_to_irradiance_liujordan(
                cloud_cover, **kwargs)
        else:
            raise ValueError('invalid how argument')

        return irrads

    def kelvin_to_celsius(self, temperature):
        """
        Converts Kelvin to celsius.

        Parameters
        ----------
        temperature: numeric

        Returns
        -------
        temperature: numeric
        """
        return temperature - 273.15

    def isobaric_to_ambient_temperature(self, data):
        """
        Calculates temperature from isobaric temperature.

        Parameters
        ----------
        data: DataFrame
            Must contain columns pressure, temperature_iso,
            temperature_dew_iso. Input temperature in K.

        Returns
        -------
        temperature : Series
            Temperature in K
        """

        P = data['pressure'] / 100.0  # noqa: N806
        Tiso = data['temperature_iso']  # noqa: N806
        Td = data['temperature_dew_iso'] - 273.15  # noqa: N806

        # saturation water vapor pressure
        e = 6.11 * 10**((7.5 * Td) / (Td + 273.3))

        # saturation water vapor mixing ratio
        w = 0.622 * (e / (P - e))

        temperature = Tiso - ((2.501 * 10.**6) / 1005.7) * w

        return temperature

    def uv_to_speed(self, data):
        """
        Computes wind speed from wind components.

        Parameters
        ----------
        data : DataFrame
            Must contain the columns 'wind_speed_u' and 'wind_speed_v'.

        Returns
        -------
        wind_speed : Series
        """
        wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2)

        return wind_speed

    def gust_to_speed(self, data, scaling=1 / 1.4):
        """
        Computes standard wind speed from gust.
        Very approximate and location dependent.

        Parameters
        ----------
        data : DataFrame
            Must contain the column 'wind_speed_gust'.

        Returns
        -------
        wind_speed : Series
        """
        wind_speed = data['wind_speed_gust'] * scaling

        return wind_speed
Example #11
0
from netCDF4 import num2date
import numpy as np
import scipy.ndimage as ndimage
from siphon.ncss import NCSS

########################################
# Set up access to the data

# Create NCSS object to access the NetcdfSubset
base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/gfs-g4-anl-files/'
dt = datetime(2016, 8, 22, 18)
ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/gfsanl_4_{dt:%Y%m%d}_'
            '{dt:%H}00_003.grb2'.format(base_url, dt=dt))

# Create lat/lon box for location you want to get data for
query = ncss.query()
query.lonlat_box(north=50, south=30, east=-80, west=-115)
query.time(datetime(2016, 8, 22, 21))

# Request data for geopotential height
query.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric',
                'v-component_of_wind_isobaric')
query.vertical_level(100000)
data = ncss.get_data(query)

# Pull out variables you want to use
height_var = data.variables['Geopotential_height_isobaric']
u_wind_var = data.variables['u-component_of_wind_isobaric']
v_wind_var = data.variables['v-component_of_wind_isobaric']

# Find the name of the time coordinate
from scipy.ndimage import gaussian_filter
from siphon.ncss import NCSS

###########################
# **Get the data**
#
# This example will use data from the North American Mesoscale Model Analysis
# (https://nomads.ncdc.gov/) for 12 UTC 27 April 2011.

base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/namanl/'
dt = datetime(2011, 4, 27)
ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/namanl_218_{dt:%Y%m%d}_'
            '1800_000.grb'.format(base_url, dt=dt))

# Query for required variables
gfsdata = ncss.query().all_times()
gfsdata.variables('Geopotential_height_isobaric',
                  'u-component_of_wind_isobaric',
                  'v-component_of_wind_isobaric',
                  'Temperature_isobaric',
                  'Relative_humidity_isobaric',
                  'Best_4_layer_lifted_index_layer_between_two_pressure_'
                  'difference_from_ground_layer',
                  'Absolute_vorticity_isobaric',
                  'Pressure_reduced_to_MSL_msl',
                  'Dew_point_temperature_height_above_ground'
                  ).add_lonlat()

# Set the lat/lon box for the data to pull in.
gfsdata.lonlat_box(-135, -60, 15, 65)
Example #13
0
class TestNCSS(object):
    server = 'http://thredds.ucar.edu/thredds/ncss/'
    urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2'

    @recorder.use_cassette('ncss_test_metadata')
    def setup(self):
        dt = datetime(2015, 6, 12, 15, 0, 0)
        self.ncss = NCSS(self.server + self.urlPath)
        self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt)
        self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric')

    def test_good_query(self):
        assert self.ncss.validate_query(self.nq)

    def test_bad_query(self):
        self.nq.variables('foo')
        assert not self.ncss.validate_query(self.nq)

    def test_bad_query_no_vars(self):
        self.nq.var.clear()
        assert not self.ncss.validate_query(self.nq)

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_xml_point(self):
        self.nq.accept('xml')
        xml_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in xml_data
        assert 'Relative_humidity_isobaric' in xml_data
        eq_(xml_data['lat'][0], 40)
        eq_(xml_data['lon'][0], -105)

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_csv_point(self):
        self.nq.accept('csv')
        csv_data = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in csv_data
        assert 'Relative_humidity_isobaric' in csv_data
        eq_(csv_data['lat'][0], 40)
        eq_(csv_data['lon'][0], -105)

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unit_handler_csv(self):
        self.nq.accept('csv')
        self.ncss.unit_handler = tuple_unit_handler
        csv_data = self.ncss.get_data(self.nq)

        temp = csv_data['Temperature_isobaric']
        eq_(len(temp), 2)
        eq_(temp[1], 'K')

        relh = csv_data['Relative_humidity_isobaric']
        eq_(len(relh), 2)
        eq_(relh[1], '%')

    @recorder.use_cassette('ncss_gfs_xml_point')
    def test_unit_handler_xml(self):
        self.nq.accept('xml')
        self.ncss.unit_handler = tuple_unit_handler
        xml_data = self.ncss.get_data(self.nq)

        temp = xml_data['Temperature_isobaric']
        eq_(len(temp), 2)
        eq_(temp[1], 'K')

        relh = xml_data['Relative_humidity_isobaric']
        eq_(len(relh), 2)
        eq_(relh[1], '%')

    @recorder.use_cassette('ncss_gfs_netcdf_point')
    def test_netcdf_point(self):
        self.nq.accept('netcdf')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        eq_(nc.variables['latitude'][0], 40)
        eq_(nc.variables['longitude'][0], -105)

    @recorder.use_cassette('ncss_gfs_netcdf4_point')
    def test_netcdf4_point(self):
        self.nq.accept('netcdf4')
        nc = self.ncss.get_data(self.nq)

        assert 'Temperature_isobaric' in nc.variables
        assert 'Relative_humidity_isobaric' in nc.variables
        eq_(nc.variables['latitude'][0], 40)
        eq_(nc.variables['longitude'][0], -105)

    @recorder.use_cassette('ncss_gfs_vertical_level')
    def test_vertical_level(self):
        self.nq.accept('csv').vertical_level(50000)
        csv_data = self.ncss.get_data(self.nq)

        eq_(str(csv_data['Temperature_isobaric'])[:6], '263.39')

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_raw_csv(self):
        self.nq.accept('csv')
        csv_data = self.ncss.get_data_raw(self.nq)

        assert csv_data.startswith(b'date,lat')

    @recorder.use_cassette('ncss_gfs_csv_point')
    def test_unknown_mime(self):
        self.nq.accept('csv')
        with response_context():
            csv_data = self.ncss.get_data(self.nq)
            assert csv_data.startswith(b'date,lat')
Example #14
0
import numpy as np
import scipy.ndimage as ndimage
from siphon.catalog import TDSCatalog
from siphon.ncss import NCSS

# Latest GFS Dataset
cat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/'
                 'NCEP/GFS/Global_0p5deg/latest.xml')
best_ds = list(cat.datasets.values())[0]
ncss = NCSS(best_ds.access_urls['NetcdfSubset'])

# Set the time to current
now = datetime.utcnow()

# Query for Latest GFS Run
gfsdata_hght = ncss.query().time(now).accept('netcdf4')
gfsdata_hght.variables('Geopotential_height_isobaric').add_lonlat()

# Set the lat/lon box for the data you want to pull in.
# lonlat_box(north_lat,south_lat,east_lon,west_lon)
gfsdata_hght.lonlat_box(0, 360, 0, 90)

# Set desired level 50000 = 50000 Pa = 500 hPa
gfsdata_hght.vertical_level(25000)

# Actually getting the data
data_hght = ncss.get_data(gfsdata_hght)

# Query for Latest GFS Run
gfsdata_wind = ncss.query().time(now).accept('netcdf4')
gfsdata_wind.variables('u-component_of_wind_isobaric',
Example #15
0
# Extract HRRR data using Unidata's Siphon package

# <codecell>

# Resolve the latest HRRR dataset
from siphon.catalog import TDSCatalog
latest_hrrr = TDSCatalog('http://thredds-jumbo.unidata.ucar.edu/thredds/catalog/grib/HRRR/CONUS_3km/surface/latest.xml')
hrrr_ds = list(latest_hrrr.datasets.values())[0]

# Set up access via NCSS
from siphon.ncss import NCSS
ncss = NCSS(hrrr_ds.access_urls['NetcdfSubset'])

# Create a query to ask for all times in netcdf4 format for
# the Temperature_surface variable, with a bounding box
query = ncss.query()

# <codecell>

dap_url = hrrr_ds.access_urls['OPENDAP']

# <codecell>

query.all_times().accept('netcdf4').variables('u-component_of_wind_height_above_ground',
                                              'v-component_of_wind_height_above_ground')
query.lonlat_box(45, 41., -63, -71.5)

# Get the raw bytes and write to a file.
data = ncss.get_data_raw(query)
with open('test_uv.nc', 'wb') as outf:
    outf.write(data)
import metpy.calc as mpcalc
from metpy.units import units
from netCDF4 import num2date
import numpy as np
import scipy.ndimage as ndimage
from siphon.ncss import NCSS

##################################
# Set up netCDF Subset Service link
dt = datetime(2016, 4, 16, 18)
base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/namanl/'
ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/namanl_218_{dt:%Y%m%d}_'
            '{dt:%H}00_000.grb'.format(base_url, dt=dt))

# Data Query
hgt = ncss.query().time(dt)
hgt.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric',
              'v-component_of_wind_isobaric').add_lonlat()

# Actually getting the data
data = ncss.get_data(hgt)

##################################
# Pull apart the data

# Get dimension names to pull appropriate variables
dtime = data.variables['Geopotential_height_isobaric'].dimensions[0]
dlev = data.variables['Geopotential_height_isobaric'].dimensions[1]
dlat = data.variables['Geopotential_height_isobaric'].dimensions[2]
dlon = data.variables['Geopotential_height_isobaric'].dimensions[3]