예제 #1
0
    def metadata(self):
        """
        Retrieve the metadata from Mesowest. Two calls are made to the Mesowest API.
        The first call is to get the networks in order to determine the `network` and
        `primary_provider`. The second call retrieves the metadata given the `config`
        parameters. The two dataframes are combined and inserted into the database
        given the :class:`~wxcb.database.Database` instance.
        """

        self._logger.info('Obtaining metadata form Mesowest')
        m = Meso(token=self.token)

        # get the networks associated with the metadata
        networks = m.networks()
        net = pd.DataFrame(networks['MNET'])
        net['MNET_ID'] = net['ID']

        # get metadata from Mesowest
        self._logger.debug('Obtaining metadata for {}'.format(self.config))
        meta = m.metadata(**self.config)

        # add the networks to the meta dataframe
        mdf = pd.DataFrame(meta['STATION'])
        mdf = pd.merge(mdf, net, on='MNET_ID')

        # pull out the data from Mesowest into the database format
        DF = pd.DataFrame()
        for c in self.conversion:
            DF[self.conversion[c]] = mdf[c]

        # fill in the network conversion
        for n in self.network_conversion:
            DF[self.network_conversion[n]] = mdf[n]

        # elevation is reported in feet, convet to meters
        DF['elevation'] = DF['elevation'].astype(float) / 3.28084

        # these are the reported lat/long's for the station that may get changed
        # down the road due to location errors
        DF['reported_lat'] = DF['latitude']
        DF['reported_long'] = DF['longitude']

        # calculate the UTM coordinates
        DF['utm_x'], DF['utm_y'], DF['utm_zone'] = zip(
            *DF.apply(utils.df_utm, axis=1))

        # add the source to the DF
        DF['source'] = 'mesowest'

        DF = DF.where((pd.notnull(DF)), None)

        # insert the dataframe into the database
        self.db.insert_data(DF, 'metadata', description='Mesowest metadata')
예제 #2
0
st_name = x['NAME']
temp = str(
    x['OBSERVATIONS']['air_temp_value_1']['value']) + u'\N{DEGREE SIGN}' + 'F'
wind = str(x['OBSERVATIONS']['wind_speed_value_1']['value']) + ' mph'

result = 'The current weather at ' + st_name + ' is ' + temp + ' with a sustained wind of ' + wind
print(result)

# I import Pretty Print to make the returned dictionary look, well, pretty.
pp = pprint.PrettyPrinter(indent=2)

# Instance a Meso object by passing in YOUR api_token
m = Meso(token='YOUR TOKEN')  # this token for testing only

# Here we retrieve only the stations in Larimer County, Colorado
stations = m.metadata(state='CO', county='Larimer')

# Calling variables() returns all possible sensor variables at stations
variables = m.variables()

# This returns a climatology for Denver from Apr 26 OOz to Apr 27 OOz
climate = m.climatology(stid='kden',
                        startclim='04260000',
                        endclim='04270000',
                        units='precip|in')

# Fetches the latest obs for Fort Collins airport within 30 min of Apr 26 18z
attime = m.attime(stid='kfnl', attime='201504261800', within='30')

# Or just get the latest observation within the last 15 minutes
latest = m.latest(stid='kfnl', within='15')
예제 #3
0
# Make array of integers ("for" loop indices)
#stations = np.arange(0, np.size(mw_data['STATION']))

# Get metadata for all active stations within bounding box (UW WRF domain)
#all_mw_data = m.metadata(bbox=[np.min(lon1), np.min(lat1), np.max(lon1), np.max(lat1)], status='active')
#all_mw_data = m.metadata(state='WA')
#all_mw_data = m.metadata(stid=['ksea','kpuw'])
# All ASOS/AWOS station in WA, OR, ID in MesoWest
# Potential stations: https://www.faa.gov/air_traffic/weather/asos/

all_mw_data = m.metadata(stid=['KOKH','KAWO','KBLI','KPWT','KCLS','KDLS','KDEW',
'KORS','KELN','KEPH','KPAE','KFHR','KHQM','KKLS','KMWH','KOLM','KOMK','KEAT',
'KPSC','KNOW','KCLM','K0S9','KS40','KPUW','KPLU','KUIL','KRNT','KRLD','KBFI',
'KSEA','KSHN','KBVS','KGEG','KSFF','KSMP','K1S5','KTIW','KFCT','KVUO','KALW',
'K2S8','KS52','KYKM','KAST','KUAO','KBKE','KBDN','KBOK','KBNO','KCVO','KPDT',
'KEUG','K6S2','K4S1','KGCD','K3S8','KHRI','K77S','K4S2','KJSY','KLMT','KLGD',
'KLKV','K9S9','KS33','KMMV','KSLE','KMEH','KONP','KOTH','KONO','KHIO','KTTD',
'KPDX','K3S9','KRDM','KMFR','KRBG','KSPB','KSXT','KTMK','KBOI','K65S','KBYI',
'KEUL','KLLJ','KCOE','KDIJ','KSUN','KGIC','KIDA','KJER','KTWF','KLWS','KMYL',
'KMLP','KMAN','KPIH','KRXE','KSMN','KSZT'])

stations = []

# Make list of all station IDs 
for ind in np.arange(0, np.size(all_mw_data['STATION'])):
    stations.append(all_mw_data['STATION'][ind]['STID'])

# Create empty data frames
stats_all = pd.DataFrame() # statistics for each station
df_overall = pd.DataFrame() # data for each variable for each model
stats_overall = pd.DataFrame() # statistics for each variable for each model
예제 #4
0
x = latest['STATION'][0]
st_name = x['NAME']
temp = str(x['OBSERVATIONS']['air_temp_value_1']['value']) + u'\N{DEGREE SIGN}' + 'F'
wind = str(x['OBSERVATIONS']['wind_speed_value_1']['value']) + ' mph'

result = 'The current weather at ' + st_name + ' is ' + temp + ' with a sustained wind of ' + wind
print(result)

# I import Pretty Print to make the returned dictionary look, well, pretty.
pp = pprint.PrettyPrinter(indent=2)

# Instance a Meso object by passing in YOUR api_token
m = Meso(token='YOUR TOKEN') # this token for testing only

# Here we retrieve only the stations in Larimer County, Colorado
stations = m.metadata(state='CO', county='Larimer')

# Calling variables() returns all possible sensor variables at stations
variables = m.variables()

# This returns a climatology for Denver from Apr 26 OOz to Apr 27 OOz
climate = m.climatology(stid='kden', startclim='04260000', endclim='04270000', units='precip|in')

# Fetches the latest obs for Fort Collins airport within 30 min of Apr 26 18z
attime = m.attime(stid='kfnl', attime='201504261800', within='30')

# Or just get the latest observation within the last 15 minutes
latest = m.latest(stid='kfnl', within='15')

# Returns a time series from Fort Collins airport from Apr 26 18z to Apr 26 23z
time = m.timeseries(stid='kfnl', start='201504261800', end='201504262300')
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
import pickle
import os
import julian
import datetime
from datetime import timezone

import metpy.calc as mpcalc
from metpy.cbook import get_test_data
from metpy.plots import add_metpy_logo, SkewT
from metpy.units import units
from scipy.constants import convert_temperature
from MesoPy import Meso

key = 'b1c91e501782441d97ac056e2501b5b0'
m = Meso(token=key)
stations = m.metadata(stid='BBEC1')
print(stations)

####### USER INPUT ##################

## Your Meswest token
# Email MesoWest API <*****@*****.**> to request a API token
m = Meso(token=your token goes here)

## Local Path and file name of output netcdf file
ncfilename = os.path.normpath('YOUR PATH HERE TEST.nc')

## Select Stations

#1) Manually
#sta_id = ['ksea','sno38','alp44','ksmp','keln']

#2) Select stations from lat lon box
stations = m.metadata(bbox=[-121.837006,47.214409,-121.015778,47.547306])
# Get Station sta_id
# Convert List to nuppy array
N_sta_in = stations['SUMMARY']['NUMBER_OF_OBJECTS']
sta_id = np.empty(N_sta_in,dtype='|S10')
for n,x in enumerate(stations['STATION']):
    sta_id[n] = x['STID']

## Define variable names to extract
# uncomment below to see available variables
#print m.variables()
#  Examples
Vars_ext = ['air_temp_set_1','wind_speed_set_1','wind_direction_set_1','weather_cond_code_set_1'] 


## Define Time period
class WeatherDataGetter:
    def __init__(self,
                 API_KEY,
                 station_metadata_file,
                 wildfire_occurences_file,
                 wildfire_weather_file,
                 year_threshold,
                 state_specified=None,
                 station_radius_threshold=10):
        # Init meso
        self.meso = Meso(token=API_KEY)

        self.station_metadata = None
        # If file is not specified, retrieve data from API
        if os.path.isfile(station_metadata_file):
            self.station_metadata = pd.read_csv(station_metadata_file)
        else:
            self.station_metadata = self.get_station_metadata(
                station_metadata_file)

        # Class Vairables
        self.wildfires_df = pd.read_csv(wildfire_occurences_file)
        self.year_thresold = year_threshold
        self.state_specified = state_specified
        self.wildfire_weather_file = wildfire_weather_file
        self.station_radius_threshold = station_radius_threshold

        self.preprocess_wildfires()

    def get_5_nearest(self, lat, lon, n):
        """
        Takes position in the form of lat, lon and returns n nearest mesowest stations
        :param lat: latitude
        :param lon: longitude
        :param n: number of stations
        :return: n nearest stations
        """

        all_points = self.station_metadata[['latitude', 'longitude']]
        self.station_metadata['distance'] = cdist([(lat, lon)], all_points).T

        n_smallest = self.station_metadata.nsmallest(n=n, columns='distance')
        n_smallest['miles'] = [
            haversine(lon, lat, row['longitude'], row['latitude'])
            for _, row in n_smallest.iterrows()
        ]

        n_smallest = n_smallest[
            n_smallest['miles'] <= self.station_radius_threshold]

        return n_smallest['STID'].tolist()

    def preprocess_wildfires(self):
        """
        Preprocesses wildfires database, filtering by year_threshold and state (if specified)
        :return: None
        """
        # Convert dates to datetimes. Add an year column
        self.wildfires_df['DISCOVERY_DATE'] = pd.to_datetime(
            self.wildfires_df['DISCOVERY_DATE'])
        self.wildfires_df['CONT_DATE'] = pd.to_datetime(
            self.wildfires_df['CONT_DATE'])
        self.wildfires_df['year'] = pd.DatetimeIndex(
            self.wildfires_df['DISCOVERY_DATE']).year

        # Filter to all records greater than an year and sort row in descending order by date
        self.wildfires_df = self.wildfires_df[
            self.wildfires_df['DISCOVERY_DATE'].dt.year >=
            self.year_thresold].sort_values(by=['DISCOVERY_DATE'],
                                            ascending=False)

        # Filter to only a particular state
        if self.state_specified:
            self.wildfires_df = self.wildfires_df[self.wildfires_df['STATE'] ==
                                                  self.state_specified]

    def get_station_metadata(self, file_name):
        """
        Gets all stations metadata from mesowest API and saves it in a file
        :param file_name: file where station data should be stored
        :return: None
        """
        vars = ['air_temp', 'relative_humidity', 'wind_speed', 'precip_accum']
        metadata = self.meso.metadata(country='us',
                                      status='ACTIVE',
                                      var=vars,
                                      obrange='20110101, 20160101')

        out = []
        for i in range(len(metadata['STATION'])):
            try:
                out.append([
                    metadata['STATION'][i]['STID'],
                    metadata['STATION'][i]['LATITUDE'],
                    metadata['STATION'][i]['LONGITUDE']
                ])
            except:
                pass
        df = pd.DataFrame(out, columns=['STID', 'latitude', 'longitude'])
        df.to_csv(file_name)

    def get_weather_data(self):
        """
        Purpose is to get weather data from nearby stations of a fire
        :return: None
        """

        # TODO: Add last_n_days query functionality
        radius = '10'
        last_n_days = 7

        cnt = 0  # Keeps cnt of rows. Used to periodically write rows to file
        stations_data = [
        ]  # Stores data for a few stations till it is written to file
        header = True  # For dataframe header
        write_rows_threshold = 100

        columns = [
            'FOD_ID', 'STID', 'distance', 'date_time', 'air_temperature',
            'relative_humidity', 'wind_speed', 'precipitation'
        ]

        for index, row in self.wildfires_df.iterrows():
            if cnt % write_rows_threshold == 0:  # Every write_rows_threshold rows, write to file
                df = pd.DataFrame(stations_data, columns=columns)
                with open(self.wildfire_weather_file, 'a') as f:
                    df.to_csv(f, header=header)
                    header = False
                stations_data = []

            #
            start = (row['DISCOVERY_DATE'] - timedelta(days=0)).replace(
                hour=12, minute=0).strftime("%Y%m%d%H%M")
            lat = row['LATITUDE']
            lon = row['LONGITUDE']
            days = [start]

            station_data = self.get_weather_data_api(lat,
                                                     lon,
                                                     radius,
                                                     row['FOD_ID'],
                                                     days,
                                                     stids=self.get_5_nearest(
                                                         lat=float(lat),
                                                         lon=float(lon),
                                                         n=5))
            print(cnt)
            if station_data:
                stations_data.extend(station_data)
            cnt += 1

    def get_weather_data_api(self, lat, lon, radius, wildfire_ID, days, stids):
        """
        Queries mesowest API for given lat, lon and radius. Given nearby stations
        :param lat: optional, if stid provided
        :param lon: optional, if stid provided
        :param radius: optional, if stid provided
        :param wildfire_ID:
        :param days: TODO: for n days. Currently will only query for a single day
        :param stids: STIDs
        :return: station data
        """
        radius_param = str(lat) + ',' + str(lon) + ',' + str(radius)
        vars = ['air_temp', 'relative_humidity', 'wind_speed', 'precip_accum']

        # Get data for a locations with sepcified radius
        data = []
        try:
            for day in days:
                day_data = self.meso.attime(radius=radius_param,
                                            attime=day,
                                            within=60,
                                            stid=stids,
                                            vars=vars)
                if not day_data:
                    return None

                data.append(day_data)
        except Exception as e:
            print(e)
            return

        if not data:
            return

        # Query data to generate station_df which would have the following columns:
        # STID, Date_time, Var1, Var2...
        # We could omit all stations which do not have all the values
        # Currently, keeping all stations irrespective of their missing values
        # Each occurence of a date_time val is supposed to be a row.
        # Here since, we are only querying attime, we will have only 1 date_time field
        vars_data = [
            'date_time', 'air_temp_value_1', 'relative_humidity_value_1',
            'wind_speed_value_1', 'precip_accum_value_1'
        ]

        stations = [station['STID'] for station in data[0]['STATION']]
        station_data = []

        # We are storing station ID from first day's data.
        # Subsequently querying other days data for these stations and storing them
        # TODO: Optimize below for loop
        for station_id in stations:
            for day_idx in range(len(data)):
                if data[day_idx]:
                    temp = []
                    for station in data[day_idx]['STATION']:
                        if station['STID'] == station_id:
                            row = [
                                wildfire_ID, station['STID'],
                                station['DISTANCE']
                            ]
                            for var in vars_data:
                                if var == 'date_time':
                                    row.append(days[day_idx])
                                elif var in station['OBSERVATIONS']:
                                    row.append(
                                        station['OBSERVATIONS'][var]['value'])
                                else:
                                    row.append('')
                            temp.append(row)
                    station_data.extend(temp)

        return station_data
예제 #8
0
    diablo_obs, diablo_days = find_diablo_obs(data)

    #make scatter plot of the data
    plt.figure(i)
    plt.scatter(data['Dir'], data['Spd'], 1.5, 'k')
    plt.scatter(diablo_obs['Dir'], diablo_obs['Spd'], 3, 'm', edgecolors='m')
    plt.ylabel('Wind Speed (kts)')
    plt.xlabel('Wind Direction (deg)')
    plt.xticks([0, 45, 90, 135, 180, 225, 270, 315],
               ['0', '45', '90', '135', '180', '225', '270', '315'])
    plt.yticks([0, 5, 10, 15, 20, 25, 30, 35, 40],
               ['0', '5', '10', '15', '20', '25', '30', '35', '40'])
    plt.xlim([0, 360])
    plt.ylim([0, 40])
    plt.grid(True)
    plt.title(stn_ids[i].upper() + ' Scatter Plot: Diablo Obs = ' +
              str(len(diablo_obs)) + ' / Total Obs: ' + str(len(data)))
    plt.savefig('images/20180626/scatter_' + stn_ids[i] + '.pdf')
    plt.savefig('images/20180626/scatter_' + stn_ids[i] + '.png')
    plt.close()

    pickle.dump(diablo_obs,
                open('pickles/' + stn_ids[i] + '_diablo_obs.p', 'wb'))
    pickle.dump(diablo_days,
                open('pickles/' + stn_ids[i] + '_diablo_day.p', 'wb'))
    stations = m.metadata(stid=stn_ids[i])
    pickle.dump([
        stations['STATION'][0]['LATITUDE'],
        stations['STATION'][0]['LONGITUDE'], stations['STATION'][0]['ELEV_DEM']
    ], open('pickles/' + stn_ids[i] + 'meta.p', 'wb'))
예제 #9
0
    #kbdistations = ['KABI']
    stationsDict = {'CDDT2': {'STID': 'CDDT2', 'NAME': 'CADDO'}}
    #stationsDict = {'KABI': {'STID': 'KABI', 'NAME': 'ABILENE'}}

    print len(kbdistations)

    # Set default values for dictionary entries. These will later be overwritten if
    for station in stationsDict:
        stationsDict[station]['RECENT_OBS'] = -99
        stationsDict[station]['MX_TEMP'] = -99
        stationsDict[station]['MW_PCP'] = 0
        stationsDict[station]['LONGITUDE'] = 0
        stationsDict[station]['LATITUDE'] = 0

    # meta data request to determine if station is current
    mwmetadata = m.metadata(stid=kbdistations, start=start, end=end)

    # precipitation query will return total precipitation over the last 24 hours
    start, end = '201712111800', '201712121200'
    mwprecipdata = m.precip(stid=kbdistations,
                            start=start,
                            end=end,
                            units='precip|in')
    #precip = m.precip(stid='CDDT2', start='201709261800',end='201711271200', units='precip|in')
    #print(mwprecipdata)

    # temperature query will return all observations over the last 24 hours
    # Add vars for air_temp_high_6_hour

    mwtempdata = m.timeseries(stid=kbdistations,
                              start=start,
예제 #10
0
    #         raise ValueError('"{}" not available for station {}!'.format(v, stid))

    # Get our times and measurements from the full returned dictionary
    date_time = response['STATION'][0]['OBSERVATIONS']['date_time']
    site_data = {
        v: response['STATION'][0]['OBSERVATIONS'][v + '_set_1']
        for v in varlist
    }
    df = pd.DataFrame(index=date_time, data=site_data)
    df.index = pd.to_datetime(df.index)
    return df


#use mesopy to get the asos meta_data
m = Meso(token=TOKEN)
stations = m.metadata(bbox=[-123.021397, 37.03180, -120.173988, 38.810713])
N_sta_in = stations['SUMMARY']['NUMBER_OF_OBJECTS']

#obtain the data using the function and the urllib and mesowest api.  dump it into pickle files for each station
for n, x in enumerate(stations['STATION']):
    if x['PERIOD_OF_RECORD']['start'] is not None:
        if x['PERIOD_OF_RECORD']['start'][0:4] == '1970':
            sta_id = x['STID']
            print(sta_id)
            print(str(n) + "/" + str(N_sta_in))
            first = x['PERIOD_OF_RECORD']['start'][0:4] + x['PERIOD_OF_RECORD'][
                'start'][5:7] + x['PERIOD_OF_RECORD']['start'][8:10] + '0000'
            last = x['PERIOD_OF_RECORD']['end'][0:4] + x['PERIOD_OF_RECORD'][
                'end'][5:7] + x['PERIOD_OF_RECORD']['end'][8:10] + '0000'
            if sta_id != 'KMCC' and sta_id != 'KMER' and sta_id != 'KMHR':
                df = load_hourly_asos(sta_id, '199701010000', last)