Example #1
0
def get_latest_date(config, data_binding, stid, table_type='OBS'):
    """
    Retrieve the latest datetime in a table for a station.

    :param config:
    :param data_binding: str: name of the data binding to use
    :param stid: str: station ID
    :param table_type: str: type of table
    :return: datetime: last available observation date
    """
    # Get the database and the names of columns in the schema
    database = config['DataBinding'][data_binding]['database']
    schema_name = config['DataBinding'][data_binding]['schema']
    schema = get_object(schema_name).schema
    date_key = schema[table_type][0][0]
    table = '%s_%s' % (stid.upper(), table_type.upper())

    conn = connection(config, database)
    cursor = conn.cursor()

    try:
        cursor.execute("SELECT %s FROM %s ORDER BY %s DESC LIMIT 1;" % (date_key, table, date_key))
        last_dt = date_to_datetime(cursor.fetchone()[0])
    except:
        last_dt = None

    return last_dt
Example #2
0
def get_forecast_stats(forecasts, verifs, day_list=None):
    """
    Returns the statistics of a forecast relative to a verification.
    """
    if day_list is not None:
        days = day_list
    else:
        days = list(forecasts.keys() & verifs.keys())
    num_days = len(days)
    stats_dict = OrderedDict()
    stats_dict['attrs'] = OrderedDict()
    stats_dict['attrs']['numDays'] = num_days
    stats_dict['attrs']['verifyingDays'] = [date_to_datetime(d).isoformat() + 'Z' for d in days]
    stats_dict['stats'] = OrderedDict()

    for var in ['high', 'low', 'wind', 'rain']:
        stats_dict['stats'][var] = OrderedDict()

    if num_days < 1:
        return stats_dict

    for var in ['high', 'low', 'wind', 'rain']:
        forecast_values = np.array([getattr(forecasts[day], var) for day in days], dtype=np.float)
        verif_values = np.array([getattr(verifs[day], var) for day in days], dtype=np.float)

        bias = np.nanmean(forecast_values - verif_values)
        rmse = np.nanmean(np.sqrt((forecast_values - verif_values) ** 2.))
        rmse_no_bias = np.nanmean(np.sqrt((forecast_values - bias - verif_values) ** 2.))
        stats_dict['stats'][var]['bias'] = bias
        stats_dict['stats'][var]['rmse'] = rmse
        stats_dict['stats'][var]['rmseNoBias'] = rmse_no_bias

    return stats_dict
Example #3
0
def readDaily(config,
              stid,
              data_binding,
              table_type,
              model=None,
              start_date=None,
              end_date=None,
              force_list=False):
    """
    Read a Daily or list of Dailys from a specified data_binding at a certain station id and of a given table type.
    table_type must be 'verif', 'climo', 'daily_forecast', or something defined in the schema of data_binding as
    %(stid)_%(table_type).upper(). Model should be provided unless retrieving from verif or climo.
    If start_date and end_date are None, then then the start is set to now and the end to 24 hours in the future. If
    start_date only is None, then it is set to 24 hours before end_date. If end_date only is None, then it is set to
    24 hours after start_date.

    :param config:
    :param stid: str: station ID
    :param data_binding: str: name of database binding to write to
    :param table_type: str: type of table
    :param model: str: model name
    :param start_date: datetime or str: starting date
    :param end_date: datetime or str: ending date
    :param force_list: bool: if True, returns a list even if there is only one Daily object
    :return: Daily or list of Dailys of requested data
    """
    # Get the database and table names
    database = config['DataBinding'][data_binding]['database']
    table = '%s_%s' % (stid.upper(), table_type.upper())

    # Get data from _read
    data = _read(config,
                 database,
                 table,
                 start_date=start_date,
                 end_date=end_date,
                 model=model)

    # Check that we have data
    if data is None:
        raise ValueError('db.readDaily error: no data retrieved.')

    # Generate Daily object(s)
    daily_list = []
    for index in range(len(data.index)):
        row = data.iloc[index]
        daily = Daily(stid, date_to_datetime(row['DATETIME']))
        daily.set_values(row['HIGH'], row['LOW'], row['WIND'], row['RAIN'])
        daily.model = model
        daily_list.append(daily)

    if len(data.index) == 0:
        raise ValueError('db.readDaily error: no data found.')
    elif len(data.index) > 1 or force_list:
        if config['debug'] > 9:
            print('db.readDaily: returning list of daily objects')
        return daily_list
    elif len(data.index) == 1:
        return daily_list[0]
Example #4
0
def readForecast(config, stid, model, date, hour_start=6, hour_padding=6, no_hourly_ok=False):
    """
    Return a Forecast object from the main theta-e database for a given model and date. This is specifically designed
    to return a Forecast for a single model and a single day.
    hour_start is the starting hour for the 24-hour forecast period.
    hour_padding is the number of hours on either side of the forecast period to include in the timeseries.

    :param config:
    :param stid: str: station ID
    :param model: str: model name
    :param date: datetime or str: date to retrieve
    :param hour_start: int: starting hour of the day in UTC
    :param hour_padding: int: added hours around the 24-hour TimeSeries
    :param no_hourly_ok: bool: if True, does not raise an error if the hourly timeseries is empty
    :return: Forecast
    """
    # Basic sanity check for hour parameters
    if hour_start < 0 or hour_start > 23:
        raise ValueError('db.readForecast error: hour_start must be between 0 and 23.')
    if hour_padding < 0 or hour_padding > 24:
        raise ValueError('db.readForecast error: hour_padding must be between 0 and 24.')

    # Set the default database configuration; create Forecast
    data_binding = 'forecast'
    if config['debug'] > 9:
        print("db.readForecast: reading forecast from '%s' data binding" % data_binding)
    forecast = Forecast(stid, model, date)

    # The daily forecast part
    table_type = 'DAILY_FORECAST'
    daily = readDaily(config, stid, data_binding, table_type, model, start_date=date, end_date=date)

    # The hourly forecast part
    table_type = 'HOURLY_FORECAST'
    date = date_to_datetime(date)
    start_date = date + timedelta(hours=hour_start - hour_padding)
    end_date = date + timedelta(hours=hour_start + 24 + hour_padding)
    try:
        timeseries = readTimeSeries(config, stid, data_binding, table_type, model, start_date, end_date)
    except MissingDataError:
        if no_hourly_ok:
            timeseries = TimeSeries(stid)
        else:
            raise

    # Assign and return
    forecast.timeseries = timeseries
    forecast.daily = daily
    return forecast
Example #5
0
def init(config, reset_old=False, no_climo=False):
    """
    Initializes new station IDs in the databases. Returns a list of all sites included in config that require historical
    data to be retrieved. Also creates a database if it does not exist.

    :param config:
    :param reset_old: if True, erases tables if they are too old
    :param no_climo: if True, does not check "CLIMO" tables
    """
    add_sites = []
    for data_binding in config['DataBinding'].keys():
        # Open the database and schema
        schema_name = config['DataBinding'][data_binding]['schema']
        database = config['DataBinding'][data_binding]['database']
        schema = get_object(schema_name).schema
        conn = connection(config, database)
        if conn is None:
            raise IOError('Error: db.init cannot connect to database %s' % database)
        cursor = conn.cursor()

        # Iterate through stations in the config
        for stid in config['Stations'].keys():
            add_site = False
            # Find the tables in the db and requested by the schema
            schema_table_names = ['%s_%s' % (stid.upper(), key) for key in schema.keys()]
            schema_table_structures = list(schema.values())
            # Schema must have primary (datetime) key listed first
            date_keys = [schema[key][0][0] for key in schema.keys()]
            if config['debug'] > 50:
                print('db.init: found the following tables in schema:')
                print(schema_table_names)
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
            sql_table_names = [table[0] for table in cursor.fetchall()]
            if config['debug'] > 50:
                print('db.init: found the following tables in sql db:')
                print(sql_table_names)

            # For each requested table, create it if it doesn't exist
            for t, table in enumerate(schema_table_names):
                if no_climo and 'CLIMO' in table.upper():
                    if config['debug'] > 9:
                        print('db.init: ignoring table %s' % table)
                    continue
                if not (table in sql_table_names):
                    # Something was missing, so we need to add the site to the output list
                    add_site = True
                    # A string of all table columns and types
                    if config['debug'] > 0:
                        print('db.init: need to create table %s' % table)
                    sqltypestr = ', '.join(["%s %s" % _type for _type in schema_table_structures[t]])
                    cursor.execute("CREATE TABLE %s (%s);" % (table, sqltypestr,))
                else:
                    # Check if data in table are recent
                    time_now = datetime.utcnow()
                    if table != stid.upper() + '_CLIMO':
                        recent = timedelta(days=30)
                    else:
                        recent = time_now - datetime(last_leap_year(time_now), 12, 31)
                    key = date_keys[t]
                    try:
                        cursor.execute("SELECT %s FROM %s ORDER BY %s DESC LIMIT 1;" % (key, table, key))
                        last_dt = date_to_datetime(cursor.fetchone()[0])
                    except:
                        last_dt = None
                    if last_dt is None or (time_now - last_dt > recent):
                        # Old or missing data, drop table and recreate it
                        add_site = True
                        if reset_old:
                            if config['debug'] > 0:
                                print('db.init: %s table too old, resetting it' % table)
                            cursor.execute("DROP TABLE %s;" % table)
                            sqltypestr = ', '.join(["%s %s" % _type for _type in schema_table_structures[t]])
                            cursor.execute("CREATE TABLE %s (%s);" % (table, sqltypestr,))
                        else:
                            if config['debug'] > 0:
                                print('db.init: %s table is old, adding to historical' % table)

            # Lastly, add the site if we need to rerun historical data
            if add_site and stid not in add_sites:
                add_sites.append(stid)
            elif config['debug'] > 0:
                print('db.init: nothing to do for station %s' % stid)

        conn.close()

    return add_sites
Example #6
0
def _read(config, database, table, model=None, start_date=None, end_date=None):
    """
    Return a pandas DataFrame from table in database.
    If start_date and end_date are None, then then the start is set to now and the end to 24 hours in the future. If
    start_date only is None, then it is set to 24 hours before end_date. If end_date only is None, then it is set to
    24 hours after start_date.

    :param config:
    :param database: str: name of database
    :param table: str: name of table to read from
    :param model: str: specific model to read data from
    :param start_date: datetime or str: starting date
    :param end_date: datetime or str: ending date
    :return: pandas DataFrame of requested data
    """
    # Find the dates and make strings
    start_date = date_to_datetime(start_date)
    end_date = date_to_datetime(end_date)
    if start_date is None and end_date is not None:
        start_date = end_date - timedelta(hours=24)
    elif start_date is not None and end_date is None:
        end_date = start_date + timedelta(hours=24)
    elif start_date is None and end_date is None:
        start_date = datetime.utcnow()
        end_date = start_date + timedelta(hours=24)
    start = date_to_string(start_date)
    end = date_to_string(end_date)
    if config['debug'] > 9:
        print('db._read: getting data from %s for %s to %s' % (table, start, end))

    # Open a database connection
    conn = connection(config, database)
    cursor = conn.cursor()

    # Fetch the data
    if model is None:
        sql_line = """SELECT * FROM %s WHERE DATETIME>=? AND DATETIME<=?
                       ORDER BY DATETIME ASC;""" % table
        cursor.execute(sql_line, (start, end))
    else:
        sql_line = """SELECT * FROM %s WHERE DATETIME>=? AND DATETIME<=?
                       AND MODEL=? ORDER BY DATETIME ASC""" % table
        cursor.execute(sql_line, (start, end, model.upper()))
    values = cursor.fetchall()
    if config['debug'] > 50:
        print('db._read: fetched the following values')
        print(values)

    # Check that we have data
    if len(values) == 0:
        if config['debug'] > 9:
            print('db._read: warning: no valid data found!')
        return

    # Get column names
    cursor.execute("PRAGMA table_info(%s);" % table)
    columns = [c[1].upper() for c in cursor.fetchall()]
    if config['debug'] > 50:
        print('db._read: fetched the following column names')
        print(columns)
    conn.close()  # Done with db

    # Convert to DataFrame and create TimeSeries
    data = pd.DataFrame(values)
    data.columns = columns
    # If model was given, then take it out
    if model is not None:
        data = data.drop('MODEL', axis=1)

    return data
Example #7
0
def get_owm_forecast(stid, lat, lon, api_key, forecast_date):

    # Retrieve data
    api_url = 'http://api.openweathermap.org/data/2.5/forecast'
    api_options = {
        'APPID': api_key,
        'lat': lat,
        'lon': lon,
        'units': 'imperial',
    }
    response = requests.get(api_url, params=api_options)
    owm_data = response.json()
    # Raise error for invalid HTTP response
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        print('openweathermap: got HTTP error when querying API')
        raise

    # Convert to pandas DataFrame and fix time
    owm_df = pd.DataFrame(owm_data['list'])
    owm_df['DateTime'] = np.nan
    for idx in owm_df.index:
        owm_df.loc[idx, 'DateTime'] = date_to_datetime(owm_df.loc[idx,
                                                                  'dt_txt'])
    owm_df.set_index('DateTime', inplace=True)

    # OWM has a column 'main' which contains some parameters at all times. Get all of those.
    for parameter in owm_df.loc[owm_df.index[0], 'main'].keys():
        owm_df[parameter] = owm_df['main'].apply(get_parameter,
                                                 args=(parameter, ))

    # Get some other special parameters
    # Make sure the 'rain' parameter exists (if no rain in forecast, the column is missing)
    if 'rain' not in owm_df:
        owm_df = owm_df.assign(**{'rain': 0.0})
    else:
        owm_df.loc[:, 'rain'] = mm_to_in(owm_df['rain'].apply(get_parameter,
                                                              args=('3h', )))
    owm_df['condition'] = owm_df['weather'].apply(get_parameter,
                                                  args=('description', ),
                                                  is_list=True)
    owm_df['windSpeed'] = mph_to_kt(owm_df['wind'].apply(get_parameter,
                                                         args=('speed', )))
    owm_df['windDirection'] = owm_df['wind'].apply(get_parameter,
                                                   args=('deg', ))
    owm_df['cloud'] = owm_df['clouds'].apply(get_parameter, args=('all', ))
    owm_df['dewpoint'] = np.nan
    for idx in owm_df.index:
        owm_df.loc[idx, 'dewpoint'] = dewpoint_from_t_rh(
            owm_df.loc[idx, 'temp'], owm_df.loc[idx, 'humidity'])

    # Rename remaining columns for default schema
    column_names_dict = {
        'temp': 'temperature',
    }
    owm_df = owm_df.rename(columns=column_names_dict)

    # Calculate daily values. OWM includes period maxima and minima. Note that rain in OWM is cumulative for the LAST
    # 3 hours.
    forecast_start = forecast_date.replace(hour=6)
    forecast_end = forecast_start + timedelta(days=1)
    try:
        daily_high = owm_df.loc[forecast_start:forecast_end, 'temp_max'].max()
    except KeyError:
        daily_high = owm_df.loc[forecast_start:forecast_end,
                                'temperature'].max()
    try:
        daily_low = owm_df.loc[forecast_start:forecast_end, 'temp_min'].min()
    except KeyError:
        daily_low = owm_df.loc[forecast_start:forecast_end,
                               'temperature'].min()
    daily_wind = owm_df.loc[forecast_start:forecast_end, 'windSpeed'].max()
    daily_rain = np.nanmax([
        owm_df.loc[forecast_start + timedelta(hours=3):forecast_end,
                   'rain'].sum(), 0.0
    ])

    # Create Forecast object
    forecast = Forecast(stid, default_model_name, forecast_date)
    forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain)
    forecast.timeseries.data = owm_df.reset_index()

    return forecast