def metadata(self): """ Retrieve the metadata from Mesowest. Two calls are made to the Mesowest API. The first call is to get the networks in order to determine the `network` and `primary_provider`. The second call retrieves the metadata given the `config` parameters. The two dataframes are combined and inserted into the database given the :class:`~wxcb.database.Database` instance. """ self._logger.info('Obtaining metadata form Mesowest') m = Meso(token=self.token) # get the networks associated with the metadata networks = m.networks() net = pd.DataFrame(networks['MNET']) net['MNET_ID'] = net['ID'] # get metadata from Mesowest self._logger.debug('Obtaining metadata for {}'.format(self.config)) meta = m.metadata(**self.config) # add the networks to the meta dataframe mdf = pd.DataFrame(meta['STATION']) mdf = pd.merge(mdf, net, on='MNET_ID') # pull out the data from Mesowest into the database format DF = pd.DataFrame() for c in self.conversion: DF[self.conversion[c]] = mdf[c] # fill in the network conversion for n in self.network_conversion: DF[self.network_conversion[n]] = mdf[n] # elevation is reported in feet, convet to meters DF['elevation'] = DF['elevation'].astype(float) / 3.28084 # these are the reported lat/long's for the station that may get changed # down the road due to location errors DF['reported_lat'] = DF['latitude'] DF['reported_long'] = DF['longitude'] # calculate the UTM coordinates DF['utm_x'], DF['utm_y'], DF['utm_zone'] = zip( *DF.apply(utils.df_utm, axis=1)) # add the source to the DF DF['source'] = 'mesowest' DF = DF.where((pd.notnull(DF)), None) # insert the dataframe into the database self.db.insert_data(DF, 'metadata', description='Mesowest metadata')
st_name = x['NAME'] temp = str( x['OBSERVATIONS']['air_temp_value_1']['value']) + u'\N{DEGREE SIGN}' + 'F' wind = str(x['OBSERVATIONS']['wind_speed_value_1']['value']) + ' mph' result = 'The current weather at ' + st_name + ' is ' + temp + ' with a sustained wind of ' + wind print(result) # I import Pretty Print to make the returned dictionary look, well, pretty. pp = pprint.PrettyPrinter(indent=2) # Instance a Meso object by passing in YOUR api_token m = Meso(token='YOUR TOKEN') # this token for testing only # Here we retrieve only the stations in Larimer County, Colorado stations = m.metadata(state='CO', county='Larimer') # Calling variables() returns all possible sensor variables at stations variables = m.variables() # This returns a climatology for Denver from Apr 26 OOz to Apr 27 OOz climate = m.climatology(stid='kden', startclim='04260000', endclim='04270000', units='precip|in') # Fetches the latest obs for Fort Collins airport within 30 min of Apr 26 18z attime = m.attime(stid='kfnl', attime='201504261800', within='30') # Or just get the latest observation within the last 15 minutes latest = m.latest(stid='kfnl', within='15')
# Make array of integers ("for" loop indices) #stations = np.arange(0, np.size(mw_data['STATION'])) # Get metadata for all active stations within bounding box (UW WRF domain) #all_mw_data = m.metadata(bbox=[np.min(lon1), np.min(lat1), np.max(lon1), np.max(lat1)], status='active') #all_mw_data = m.metadata(state='WA') #all_mw_data = m.metadata(stid=['ksea','kpuw']) # All ASOS/AWOS station in WA, OR, ID in MesoWest # Potential stations: https://www.faa.gov/air_traffic/weather/asos/ all_mw_data = m.metadata(stid=['KOKH','KAWO','KBLI','KPWT','KCLS','KDLS','KDEW', 'KORS','KELN','KEPH','KPAE','KFHR','KHQM','KKLS','KMWH','KOLM','KOMK','KEAT', 'KPSC','KNOW','KCLM','K0S9','KS40','KPUW','KPLU','KUIL','KRNT','KRLD','KBFI', 'KSEA','KSHN','KBVS','KGEG','KSFF','KSMP','K1S5','KTIW','KFCT','KVUO','KALW', 'K2S8','KS52','KYKM','KAST','KUAO','KBKE','KBDN','KBOK','KBNO','KCVO','KPDT', 'KEUG','K6S2','K4S1','KGCD','K3S8','KHRI','K77S','K4S2','KJSY','KLMT','KLGD', 'KLKV','K9S9','KS33','KMMV','KSLE','KMEH','KONP','KOTH','KONO','KHIO','KTTD', 'KPDX','K3S9','KRDM','KMFR','KRBG','KSPB','KSXT','KTMK','KBOI','K65S','KBYI', 'KEUL','KLLJ','KCOE','KDIJ','KSUN','KGIC','KIDA','KJER','KTWF','KLWS','KMYL', 'KMLP','KMAN','KPIH','KRXE','KSMN','KSZT']) stations = [] # Make list of all station IDs for ind in np.arange(0, np.size(all_mw_data['STATION'])): stations.append(all_mw_data['STATION'][ind]['STID']) # Create empty data frames stats_all = pd.DataFrame() # statistics for each station df_overall = pd.DataFrame() # data for each variable for each model stats_overall = pd.DataFrame() # statistics for each variable for each model
x = latest['STATION'][0] st_name = x['NAME'] temp = str(x['OBSERVATIONS']['air_temp_value_1']['value']) + u'\N{DEGREE SIGN}' + 'F' wind = str(x['OBSERVATIONS']['wind_speed_value_1']['value']) + ' mph' result = 'The current weather at ' + st_name + ' is ' + temp + ' with a sustained wind of ' + wind print(result) # I import Pretty Print to make the returned dictionary look, well, pretty. pp = pprint.PrettyPrinter(indent=2) # Instance a Meso object by passing in YOUR api_token m = Meso(token='YOUR TOKEN') # this token for testing only # Here we retrieve only the stations in Larimer County, Colorado stations = m.metadata(state='CO', county='Larimer') # Calling variables() returns all possible sensor variables at stations variables = m.variables() # This returns a climatology for Denver from Apr 26 OOz to Apr 27 OOz climate = m.climatology(stid='kden', startclim='04260000', endclim='04270000', units='precip|in') # Fetches the latest obs for Fort Collins airport within 30 min of Apr 26 18z attime = m.attime(stid='kfnl', attime='201504261800', within='30') # Or just get the latest observation within the last 15 minutes latest = m.latest(stid='kfnl', within='15') # Returns a time series from Fort Collins airport from Apr 26 18z to Apr 26 23z time = m.timeseries(stid='kfnl', start='201504261800', end='201504262300')
import numpy as np import scipy as sp import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd import xarray as xr import pickle import os import julian import datetime from datetime import timezone import metpy.calc as mpcalc from metpy.cbook import get_test_data from metpy.plots import add_metpy_logo, SkewT from metpy.units import units from scipy.constants import convert_temperature from MesoPy import Meso key = 'b1c91e501782441d97ac056e2501b5b0' m = Meso(token=key) stations = m.metadata(stid='BBEC1') print(stations)
####### USER INPUT ################## ## Your Meswest token # Email MesoWest API <*****@*****.**> to request a API token m = Meso(token=your token goes here) ## Local Path and file name of output netcdf file ncfilename = os.path.normpath('YOUR PATH HERE TEST.nc') ## Select Stations #1) Manually #sta_id = ['ksea','sno38','alp44','ksmp','keln'] #2) Select stations from lat lon box stations = m.metadata(bbox=[-121.837006,47.214409,-121.015778,47.547306]) # Get Station sta_id # Convert List to nuppy array N_sta_in = stations['SUMMARY']['NUMBER_OF_OBJECTS'] sta_id = np.empty(N_sta_in,dtype='|S10') for n,x in enumerate(stations['STATION']): sta_id[n] = x['STID'] ## Define variable names to extract # uncomment below to see available variables #print m.variables() # Examples Vars_ext = ['air_temp_set_1','wind_speed_set_1','wind_direction_set_1','weather_cond_code_set_1'] ## Define Time period
class WeatherDataGetter: def __init__(self, API_KEY, station_metadata_file, wildfire_occurences_file, wildfire_weather_file, year_threshold, state_specified=None, station_radius_threshold=10): # Init meso self.meso = Meso(token=API_KEY) self.station_metadata = None # If file is not specified, retrieve data from API if os.path.isfile(station_metadata_file): self.station_metadata = pd.read_csv(station_metadata_file) else: self.station_metadata = self.get_station_metadata( station_metadata_file) # Class Vairables self.wildfires_df = pd.read_csv(wildfire_occurences_file) self.year_thresold = year_threshold self.state_specified = state_specified self.wildfire_weather_file = wildfire_weather_file self.station_radius_threshold = station_radius_threshold self.preprocess_wildfires() def get_5_nearest(self, lat, lon, n): """ Takes position in the form of lat, lon and returns n nearest mesowest stations :param lat: latitude :param lon: longitude :param n: number of stations :return: n nearest stations """ all_points = self.station_metadata[['latitude', 'longitude']] self.station_metadata['distance'] = cdist([(lat, lon)], all_points).T n_smallest = self.station_metadata.nsmallest(n=n, columns='distance') n_smallest['miles'] = [ haversine(lon, lat, row['longitude'], row['latitude']) for _, row in n_smallest.iterrows() ] n_smallest = n_smallest[ n_smallest['miles'] <= self.station_radius_threshold] return n_smallest['STID'].tolist() def preprocess_wildfires(self): """ Preprocesses wildfires database, filtering by year_threshold and state (if specified) :return: None """ # Convert dates to datetimes. Add an year column self.wildfires_df['DISCOVERY_DATE'] = pd.to_datetime( self.wildfires_df['DISCOVERY_DATE']) self.wildfires_df['CONT_DATE'] = pd.to_datetime( self.wildfires_df['CONT_DATE']) self.wildfires_df['year'] = pd.DatetimeIndex( self.wildfires_df['DISCOVERY_DATE']).year # Filter to all records greater than an year and sort row in descending order by date self.wildfires_df = self.wildfires_df[ self.wildfires_df['DISCOVERY_DATE'].dt.year >= self.year_thresold].sort_values(by=['DISCOVERY_DATE'], ascending=False) # Filter to only a particular state if self.state_specified: self.wildfires_df = self.wildfires_df[self.wildfires_df['STATE'] == self.state_specified] def get_station_metadata(self, file_name): """ Gets all stations metadata from mesowest API and saves it in a file :param file_name: file where station data should be stored :return: None """ vars = ['air_temp', 'relative_humidity', 'wind_speed', 'precip_accum'] metadata = self.meso.metadata(country='us', status='ACTIVE', var=vars, obrange='20110101, 20160101') out = [] for i in range(len(metadata['STATION'])): try: out.append([ metadata['STATION'][i]['STID'], metadata['STATION'][i]['LATITUDE'], metadata['STATION'][i]['LONGITUDE'] ]) except: pass df = pd.DataFrame(out, columns=['STID', 'latitude', 'longitude']) df.to_csv(file_name) def get_weather_data(self): """ Purpose is to get weather data from nearby stations of a fire :return: None """ # TODO: Add last_n_days query functionality radius = '10' last_n_days = 7 cnt = 0 # Keeps cnt of rows. Used to periodically write rows to file stations_data = [ ] # Stores data for a few stations till it is written to file header = True # For dataframe header write_rows_threshold = 100 columns = [ 'FOD_ID', 'STID', 'distance', 'date_time', 'air_temperature', 'relative_humidity', 'wind_speed', 'precipitation' ] for index, row in self.wildfires_df.iterrows(): if cnt % write_rows_threshold == 0: # Every write_rows_threshold rows, write to file df = pd.DataFrame(stations_data, columns=columns) with open(self.wildfire_weather_file, 'a') as f: df.to_csv(f, header=header) header = False stations_data = [] # start = (row['DISCOVERY_DATE'] - timedelta(days=0)).replace( hour=12, minute=0).strftime("%Y%m%d%H%M") lat = row['LATITUDE'] lon = row['LONGITUDE'] days = [start] station_data = self.get_weather_data_api(lat, lon, radius, row['FOD_ID'], days, stids=self.get_5_nearest( lat=float(lat), lon=float(lon), n=5)) print(cnt) if station_data: stations_data.extend(station_data) cnt += 1 def get_weather_data_api(self, lat, lon, radius, wildfire_ID, days, stids): """ Queries mesowest API for given lat, lon and radius. Given nearby stations :param lat: optional, if stid provided :param lon: optional, if stid provided :param radius: optional, if stid provided :param wildfire_ID: :param days: TODO: for n days. Currently will only query for a single day :param stids: STIDs :return: station data """ radius_param = str(lat) + ',' + str(lon) + ',' + str(radius) vars = ['air_temp', 'relative_humidity', 'wind_speed', 'precip_accum'] # Get data for a locations with sepcified radius data = [] try: for day in days: day_data = self.meso.attime(radius=radius_param, attime=day, within=60, stid=stids, vars=vars) if not day_data: return None data.append(day_data) except Exception as e: print(e) return if not data: return # Query data to generate station_df which would have the following columns: # STID, Date_time, Var1, Var2... # We could omit all stations which do not have all the values # Currently, keeping all stations irrespective of their missing values # Each occurence of a date_time val is supposed to be a row. # Here since, we are only querying attime, we will have only 1 date_time field vars_data = [ 'date_time', 'air_temp_value_1', 'relative_humidity_value_1', 'wind_speed_value_1', 'precip_accum_value_1' ] stations = [station['STID'] for station in data[0]['STATION']] station_data = [] # We are storing station ID from first day's data. # Subsequently querying other days data for these stations and storing them # TODO: Optimize below for loop for station_id in stations: for day_idx in range(len(data)): if data[day_idx]: temp = [] for station in data[day_idx]['STATION']: if station['STID'] == station_id: row = [ wildfire_ID, station['STID'], station['DISTANCE'] ] for var in vars_data: if var == 'date_time': row.append(days[day_idx]) elif var in station['OBSERVATIONS']: row.append( station['OBSERVATIONS'][var]['value']) else: row.append('') temp.append(row) station_data.extend(temp) return station_data
diablo_obs, diablo_days = find_diablo_obs(data) #make scatter plot of the data plt.figure(i) plt.scatter(data['Dir'], data['Spd'], 1.5, 'k') plt.scatter(diablo_obs['Dir'], diablo_obs['Spd'], 3, 'm', edgecolors='m') plt.ylabel('Wind Speed (kts)') plt.xlabel('Wind Direction (deg)') plt.xticks([0, 45, 90, 135, 180, 225, 270, 315], ['0', '45', '90', '135', '180', '225', '270', '315']) plt.yticks([0, 5, 10, 15, 20, 25, 30, 35, 40], ['0', '5', '10', '15', '20', '25', '30', '35', '40']) plt.xlim([0, 360]) plt.ylim([0, 40]) plt.grid(True) plt.title(stn_ids[i].upper() + ' Scatter Plot: Diablo Obs = ' + str(len(diablo_obs)) + ' / Total Obs: ' + str(len(data))) plt.savefig('images/20180626/scatter_' + stn_ids[i] + '.pdf') plt.savefig('images/20180626/scatter_' + stn_ids[i] + '.png') plt.close() pickle.dump(diablo_obs, open('pickles/' + stn_ids[i] + '_diablo_obs.p', 'wb')) pickle.dump(diablo_days, open('pickles/' + stn_ids[i] + '_diablo_day.p', 'wb')) stations = m.metadata(stid=stn_ids[i]) pickle.dump([ stations['STATION'][0]['LATITUDE'], stations['STATION'][0]['LONGITUDE'], stations['STATION'][0]['ELEV_DEM'] ], open('pickles/' + stn_ids[i] + 'meta.p', 'wb'))
#kbdistations = ['KABI'] stationsDict = {'CDDT2': {'STID': 'CDDT2', 'NAME': 'CADDO'}} #stationsDict = {'KABI': {'STID': 'KABI', 'NAME': 'ABILENE'}} print len(kbdistations) # Set default values for dictionary entries. These will later be overwritten if for station in stationsDict: stationsDict[station]['RECENT_OBS'] = -99 stationsDict[station]['MX_TEMP'] = -99 stationsDict[station]['MW_PCP'] = 0 stationsDict[station]['LONGITUDE'] = 0 stationsDict[station]['LATITUDE'] = 0 # meta data request to determine if station is current mwmetadata = m.metadata(stid=kbdistations, start=start, end=end) # precipitation query will return total precipitation over the last 24 hours start, end = '201712111800', '201712121200' mwprecipdata = m.precip(stid=kbdistations, start=start, end=end, units='precip|in') #precip = m.precip(stid='CDDT2', start='201709261800',end='201711271200', units='precip|in') #print(mwprecipdata) # temperature query will return all observations over the last 24 hours # Add vars for air_temp_high_6_hour mwtempdata = m.timeseries(stid=kbdistations, start=start,
# raise ValueError('"{}" not available for station {}!'.format(v, stid)) # Get our times and measurements from the full returned dictionary date_time = response['STATION'][0]['OBSERVATIONS']['date_time'] site_data = { v: response['STATION'][0]['OBSERVATIONS'][v + '_set_1'] for v in varlist } df = pd.DataFrame(index=date_time, data=site_data) df.index = pd.to_datetime(df.index) return df #use mesopy to get the asos meta_data m = Meso(token=TOKEN) stations = m.metadata(bbox=[-123.021397, 37.03180, -120.173988, 38.810713]) N_sta_in = stations['SUMMARY']['NUMBER_OF_OBJECTS'] #obtain the data using the function and the urllib and mesowest api. dump it into pickle files for each station for n, x in enumerate(stations['STATION']): if x['PERIOD_OF_RECORD']['start'] is not None: if x['PERIOD_OF_RECORD']['start'][0:4] == '1970': sta_id = x['STID'] print(sta_id) print(str(n) + "/" + str(N_sta_in)) first = x['PERIOD_OF_RECORD']['start'][0:4] + x['PERIOD_OF_RECORD'][ 'start'][5:7] + x['PERIOD_OF_RECORD']['start'][8:10] + '0000' last = x['PERIOD_OF_RECORD']['end'][0:4] + x['PERIOD_OF_RECORD'][ 'end'][5:7] + x['PERIOD_OF_RECORD']['end'][8:10] + '0000' if sta_id != 'KMCC' and sta_id != 'KMER' and sta_id != 'KMHR': df = load_hourly_asos(sta_id, '199701010000', last)