Example #1
0
def get_climo(config, stid, ghcn_stid, start_year=1980):
    """
    Get climatological values as a list of Daily objects for the station
    ghcn_stid, with a climatology starting in start_year. There is no specified
    end year because wind data are limited.
    """

    # Retrieve the data
    print('climo: fetching data for GHCN station %s' % ghcn_stid)
    ghcn = get_ghcn_data(ghcn_stid)

    # For each variable, use groupby to get yearly climo
    if config['debug'] > 9:
        print('climo: grouping data into yearly climatology')
    aggregate = {'value': np.mean}
    ghcn_yearly = {}
    if config['debug'] > 9:
        print('climo: averaging for years since %d' % start_year)
    for var, df in ghcn.items():
        # Apparently values are "object" type. Convert to floats
        df['value'] = df['value'].astype(str).astype(np.float64)
        # Remove any data older than start year
        df = df[df.index > datetime(start_year, 1, 1)]
        ghcn_yearly[var] = df.groupby([df.index.month, df.index.day]).agg(aggregate)

    # Now we have dataframes with indices (month, day). We need to use the
    # nearest leap year to avoid confusion with Feb 29
    year = last_leap_year()
    # Create a list of Dailys
    dailys = []
    if config['debug'] > 50:
        print('climo: here are the values')
    for index, row in ghcn_yearly['TMAX'].iterrows():
        date = datetime(year, index[0], index[1])
        daily = Daily(stid, date)
        # We also need to convert units! Temp from 10ths of C to F, wind from
        # 10ths of m/s to kts, rain from 10ths of mm to in.
        daily.high = ghcn_yearly['TMAX'].loc[index]['value'] / 10. * 9. / 5. + 32.
        daily.low = ghcn_yearly['TMIN'].loc[index]['value'] / 10. * 9. / 5. + 32.
        daily.wind = ghcn_yearly['WSF2'].loc[index]['value'] / 10. * 1.94384
        daily.rain = ghcn_yearly['PRCP'].loc[index]['value'] / 254.
        if config['debug'] > 50:
            print('%s %0.0f/%0.0f/%0.0f/%0.2f' % (daily.date, daily.high, daily.low, daily.wind, daily.rain))
        dailys.append(daily)

    return dailys
Example #2
0
def json_climo(config, stid, start_date):
    """
    Produce a json file for verification values at a station starting at start_date and going to the latest
    available verification, and save it to file.
    """
    climo = OrderedDict()
    end_date = datetime.utcnow()
    variables = ['high', 'low', 'wind', 'rain']
    if config['debug'] > 9:
        print('web.json: retrieving climo for %s' % stid)
    dailys = []
    current_date = start_date
    while current_date <= end_date:
        climo_date = current_date.replace(year=last_leap_year())
        try:
            daily = readDaily(config,
                              stid,
                              'forecast',
                              'climo',
                              start_date=climo_date,
                              end_date=climo_date)
            daily.date = current_date
        except MissingDataError:  # missing climo data
            daily = Daily(stid, current_date)
            daily.set_values(np.nan, np.nan, np.nan, np.nan)
        dailys.append(daily)
        current_date += timedelta(days=1)
    for v in variables:
        climo[v.upper()] = [
            getattr(dailys[j], v)
            if not (np.isnan(getattr(dailys[j], v))) else None
            for j in range(len(dailys))
        ]
    climo['DATETIME'] = [
        getattr(dailys[j], 'date').isoformat() + 'Z'
        for j in range(len(dailys))
    ]

    return climo
Example #3
0
def main(config):
    """
    Main function. Runs the verification calculation.
    """

    data_binding = 'forecast'

    # Figure out which days we are verifying for: up to yesterday.
    time_now = datetime.utcnow() - timedelta(days=1, hours=6)
    end_date = datetime(time_now.year, time_now.month, time_now.day)
    print('calcVerification: calculating statistics through %s' % end_date)
    start_date = end_date - timedelta(days=31)

    # The directory and archive file
    db_dir = '%s/archive' % config['THETAE_ROOT']
    stats_file = '%s/theta-e-stats.json' % db_dir
    stats = OrderedDict()

    # Iterate over stations
    for stid in config['Stations'].keys():
        if config['debug'] > 9:
            print('calcVerification: calculating statistics for station %s' %
                  stid)

        # Load verification and climo data
        if config['debug'] > 50:
            print('calcVerification: loading verification and climo data')
        verification = readDaily(config,
                                 stid,
                                 data_binding,
                                 'verif',
                                 start_date=start_date,
                                 end_date=end_date)
        climo = []
        current_date = start_date
        while current_date <= end_date:
            climo_date = current_date.replace(year=last_leap_year())
            try:
                climo_day = readDaily(config,
                                      stid,
                                      data_binding,
                                      'climo',
                                      start_date=climo_date,
                                      end_date=climo_date)
                climo_day.date = current_date
            except ValueError:  # missing climo data
                climo_day = Daily(stid, current_date)
                climo_day.set_values(np.nan, np.nan, np.nan, np.nan)
            climo.append(climo_day)
            current_date += timedelta(days=1)

        # Get persistence and convert to dictionaries
        persistence = OrderedDict()
        for v in verification:
            persistence[date_to_string(v.date + timedelta(days=1))] = v
        verification = list_to_dict(verification)
        climo = list_to_dict(climo)

        stats[stid] = OrderedDict()
        for model in list(config['Models'].keys()):
            if config['debug'] > 50:
                print('calcVerification: loading forecast data for %s' % model)
            try:
                forecasts = readDaily(config,
                                      stid,
                                      data_binding,
                                      'daily_forecast',
                                      model=model,
                                      start_date=start_date +
                                      timedelta(days=1),
                                      end_date=end_date,
                                      force_list=True)
                forecasts = list_to_dict(forecasts)
            except ValueError:
                if config['debug'] > 9:
                    print(
                        'calcVerification warning: no data found for model %s at %s'
                        % (model, stid))
                continue
            verif_days = [
                d for d in forecasts.keys()
                if (d in verification.keys() and d in climo.keys()
                    and d in persistence.keys())
            ]

            # Get stats for each of the model, climo, and persistence. We do this for every model so that the skill
            # scores can be compared across different sets of available verification days for each model.
            if config['debug'] > 50:
                print('calcVerification: calculating statistics for %s' %
                      model)
            model_stats = get_forecast_stats(forecasts,
                                             verification,
                                             day_list=verif_days)
            climo_stats = get_forecast_stats(climo,
                                             verification,
                                             day_list=verif_days)
            persist_stats = get_forecast_stats(persistence,
                                               verification,
                                               day_list=verif_days)

            # Add in the skill scores
            for var in ['high', 'low', 'wind', 'rain']:
                try:
                    model_stats['stats'][var]['skillClimo'] = 1. - (
                        model_stats['stats'][var]['rmse'] /
                        climo_stats['stats'][var]['rmse'])
                except KeyError:
                    model_stats['stats'][var]['skillClimo'] = None
                try:
                    model_stats['stats'][var]['skillClimoNoBias'] = 1. - (
                        model_stats['stats'][var]['rmseNoBias'] /
                        climo_stats['stats'][var]['rmse'])
                except KeyError:
                    model_stats['stats'][var]['skillClimoNoBias'] = None
                try:
                    model_stats['stats'][var]['skillPersist'] = 1. - (
                        model_stats['stats'][var]['rmse'] /
                        persist_stats['stats'][var]['rmse'])
                except KeyError:
                    model_stats['stats'][var]['skillPersist'] = None
                try:
                    model_stats['stats'][var]['skillPersistNoBias'] = 1. - (
                        model_stats['stats'][var]['rmseNoBias'] /
                        persist_stats['stats'][var]['rmse'])
                except KeyError:
                    model_stats['stats'][var]['skillPersistNoBias'] = None

            # Remove NaN (not interpreted by json) and add to the large dictionary
            replace_nan_in_dict(model_stats)
            stats[stid][model] = model_stats

    # Write to the file

    with open(stats_file, 'w') as f:
        json.dump(stats, f)
Example #4
0
def init(config, reset_old=False, no_climo=False):
    """
    Initializes new station IDs in the databases. Returns a list of all sites included in config that require historical
    data to be retrieved. Also creates a database if it does not exist.

    :param config:
    :param reset_old: if True, erases tables if they are too old
    :param no_climo: if True, does not check "CLIMO" tables
    """
    add_sites = []
    for data_binding in config['DataBinding'].keys():
        # Open the database and schema
        schema_name = config['DataBinding'][data_binding]['schema']
        database = config['DataBinding'][data_binding]['database']
        schema = get_object(schema_name).schema
        conn = connection(config, database)
        if conn is None:
            raise IOError('Error: db.init cannot connect to database %s' % database)
        cursor = conn.cursor()

        # Iterate through stations in the config
        for stid in config['Stations'].keys():
            add_site = False
            # Find the tables in the db and requested by the schema
            schema_table_names = ['%s_%s' % (stid.upper(), key) for key in schema.keys()]
            schema_table_structures = list(schema.values())
            # Schema must have primary (datetime) key listed first
            date_keys = [schema[key][0][0] for key in schema.keys()]
            if config['debug'] > 50:
                print('db.init: found the following tables in schema:')
                print(schema_table_names)
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
            sql_table_names = [table[0] for table in cursor.fetchall()]
            if config['debug'] > 50:
                print('db.init: found the following tables in sql db:')
                print(sql_table_names)

            # For each requested table, create it if it doesn't exist
            for t, table in enumerate(schema_table_names):
                if no_climo and 'CLIMO' in table.upper():
                    if config['debug'] > 9:
                        print('db.init: ignoring table %s' % table)
                    continue
                if not (table in sql_table_names):
                    # Something was missing, so we need to add the site to the output list
                    add_site = True
                    # A string of all table columns and types
                    if config['debug'] > 0:
                        print('db.init: need to create table %s' % table)
                    sqltypestr = ', '.join(["%s %s" % _type for _type in schema_table_structures[t]])
                    cursor.execute("CREATE TABLE %s (%s);" % (table, sqltypestr,))
                else:
                    # Check if data in table are recent
                    time_now = datetime.utcnow()
                    if table != stid.upper() + '_CLIMO':
                        recent = timedelta(days=30)
                    else:
                        recent = time_now - datetime(last_leap_year(time_now), 12, 31)
                    key = date_keys[t]
                    try:
                        cursor.execute("SELECT %s FROM %s ORDER BY %s DESC LIMIT 1;" % (key, table, key))
                        last_dt = date_to_datetime(cursor.fetchone()[0])
                    except:
                        last_dt = None
                    if last_dt is None or (time_now - last_dt > recent):
                        # Old or missing data, drop table and recreate it
                        add_site = True
                        if reset_old:
                            if config['debug'] > 0:
                                print('db.init: %s table too old, resetting it' % table)
                            cursor.execute("DROP TABLE %s;" % table)
                            sqltypestr = ', '.join(["%s %s" % _type for _type in schema_table_structures[t]])
                            cursor.execute("CREATE TABLE %s (%s);" % (table, sqltypestr,))
                        else:
                            if config['debug'] > 0:
                                print('db.init: %s table is old, adding to historical' % table)

            # Lastly, add the site if we need to rerun historical data
            if add_site and stid not in add_sites:
                add_sites.append(stid)
            elif config['debug'] > 0:
                print('db.init: nothing to do for station %s' % stid)

        conn.close()

    return add_sites