Ejemplo n.º 1
0
def plotAdvectionOnMap(targetStation, variable, date, \
                       width_fac = 16, height_fac = 12):
     from mpl_toolkits.basemap import Basemap
     import matplotlib.pyplot as plt
     import wUAdvection as Adv
     import wUUtils as Util
     # setup Lambert Conformal basemap.
     m = Basemap(width=width_fac*100000,height=height_fac*100000, \
                 projection='lcc', resolution='i', \
                 lat_1=45.,lat_0=43.6,lon_0=-82.)
     # draw coastlines.
     m.drawcoastlines()
     m.drawcountries()
     m.drawstates()
     # draw a boundary around the map, fill the background.
     # this background will end up being the ocean color, since
     # the continents will be drawn on top.
     m.drawmapboundary(fill_color='aqua')
     # fill continents, set lake color same as ocean color.
     m.fillcontinents(color='wheat',lake_color='aqua')
     # get station locations (Toronto, Montreal, Detroit)
     stations = Util.getStationList()
     lon, lat = Util.getStationLonLat(stations)
     # convert to map projection coords.
     # Note that lon,lat can be scalars, lists or numpy arrays.
     xpt,ypt = m(lon,lat)
     m.plot(xpt,ypt,'bo')  # plot a blue dot there
     # compute advection arrows between all other cities
     # and the target station
     for istation in range(len(stations)):
          if stations[istation] != targetStation:
               # print(targetStation, stations[istation], variable, date, date)
               dD, uVec = Adv.dDeriv(targetStation, stations[istation], variable, \
                       date, nextDay(date))
               stretch = 2500
               dD = stretch*dD[0]
               dx, dy = dD*uVec
               plt.arrow(xpt[istation],ypt[istation],dx,dy,color='r',width=12000,head_length=40000,head_width=40000)
     for istation in range(len(stations)):
          plt.text(xpt[istation]+30000,ypt[istation]+20000,stations[istation])
     plt.show()
Ejemplo n.º 2
0
def advectionTaylorPredict(model_params, startDate, endDate, actual=True):
     # predict targetVar for a single station using 
     # previously generated regression model
     import numpy as np
     import wUUtils as Util
     import wUAdvection as Adv
     # extract city and feature data
     stations = model_params['stations']
     targetVar = model_params['targetVar']
     features = model_params['features']
     regr = model_params['regr']
     lag = model_params['lag']
     order = model_params['order']
     # build list of dates in datetime format
     date_list = Util.dateList(startDate, endDate)
     date_list = date_list[(lag+order):]
     # if actual data available
     if actual:
          # load target variable data
          target = Util.loadDailyVariableRange(stations[0], startDate, endDate, \
                             targetVar, castFloat=True)
          # "baseline" model is predicted target same as value on prediction day
          baseline = target[order:(-lag)]
          baseline = np.array(baseline)
          # shift vector by lag
          target = target[lag:]
          target = np.array(target)
     else:
          target = None
     # load feature data
     featureData = []
     # add data for target station
     for feature in features:
          fd = Util.loadDailyVariableRange(stations[0], startDate, endDate, \
                             feature, castFloat=True)
          # shorten vector by lag
          fd = fd[:(-lag)]
          featureData.append(fd)
     # for other stations, add the advection of each feature in the
     # direction of the target station
     for station in stations[1:]:
          for feature in features:
               # print("Adding " + feature + " from " + station)
               fd, uVec = Adv.dDeriv(stations[0], station, \
                                     feature, startDate, endDate)
               # shorten vector by lag
               fd = fd[:(-lag)]
               featureData.append(fd)
     # add in "derivative" terms
     for ideriv in range(1,order+1):
          ncols = len(stations)*len(features)
          for ii in range(ncols):
               # print("Adding " + str(ideriv) + " derivative of " + feature[jfeat])
               fd = np.diff(featureData[ii],n=ideriv)
               featureData.append(fd)
     # shorten vectors to length of highest order derivative
     nrows = len(featureData[-1])
     for column in range(len(featureData)):
          featureData[column] = featureData[column][-nrows:]
     if actual:
          target = target[-nrows:]
     # convert features to np arrays
     featureData = (np.array(featureData)).T
     pred = regr.predict(featureData)
     if actual:
          print("R^2_mean:" + "\t" + str(regr.score(featureData,target)))
          sse = ((pred-target)**2).sum()
          ssm = ((baseline-target)**2).sum()
          print("R^2_base:" + "\t" + str(1 - sse/ssm))
          rmse = np.sqrt(((pred - target)**2).mean())
          print("RMSE:\t" + "\t" + str(rmse))
          model_perf = {
                    'R2_mean': regr.score(featureData,target), \
                    'R2_base': 1 - sse/ssm, \
                    'RMSE': rmse}
     else:
          model_perf = None
     return date_list, pred, target, model_perf
Ejemplo n.º 3
0
def advectionTaylorModel(stations, startDate, endDate, \
                     features, targetVar='TempMax', \
                     lag=1, order=0, verbose=False):
     # build regression model to predict "variable" for a single
     # station using training data from multiple stations 
     # between startdate and enddate.  Uses a "Taylor expansion" 
     # by combining information from several days (higher order
     # time derivatives)
     #
     # for each variable, at target station, use value, and
     # at other stations, only the projection of its gradient 
     # in the direction of the target station
     # 
     # stations: a list of station codes, the first entry is
     #           the target station (for which forecast is generated)
     # features: a list of variables to use as predictors
     #      lag: the number of days in the future to forecast
     #    order: the number of days in the past to include
     #           (also maximum order of time derivative)
     import numpy as np
     import wUUtils as Util
     import wUAdvection as Adv
     reload(Adv)
     from sklearn import linear_model
     # load target variable data
     target = Util.loadDailyVariableRange(stations[0], startDate, endDate, \
                        targetVar, castFloat=True)
     # shift vector by lag
     target = target[lag:]
     # load feature data
     featureData = []
     # add data for target station
     for feature in features:
          fd = Util.loadDailyVariableRange(stations[0], startDate, endDate, \
                             feature, castFloat=True)
          # shorten vector by lag
          fd = fd[:(-lag)]
          featureData.append(fd)
     # for other stations, add the advection of each feature in the
     # direction of the target station
     for station in stations[1:]:
          for feature in features:
               # print("Adding " + feature + " from " + station)
               fd, uVec = Adv.dDeriv(stations[0], station, \
                                     feature, startDate, endDate)
               # shorten vector by lag
               fd = fd[:(-lag)]
               featureData.append(fd)
     # add in "derivative" terms
     for ideriv in range(1,order+1):
          ncols = len(stations)*len(features)
          for ii in range(ncols):
               # print("Adding " + str(ideriv) + " derivative of " + feature[jfeat])
               fd = np.diff(featureData[ii],n=ideriv)
               featureData.append(fd)
     # shorten vectors to length of highest order derivative
     nrows = len(featureData[-1])
     for column in range(len(featureData)):
          featureData[column] = featureData[column][-nrows:]
     target = target[-nrows:]
     # convert target and features to np arrays
     target = np.array(target)
     featureData = (np.array(featureData)).T
     regr = linear_model.LinearRegression()
     regr.fit(featureData, target)
     model_params = {
            'stations': stations, \
            'startDate': startDate, \
            'endDate': endDate, \
            'targetVar': targetVar, \
            'features': features, \
            'regr': regr, \
            'lag': lag, \
            'order': order}
     # report regression results:
     print("R^2: " + str(regr.score(featureData,target)))
     if verbose:
          print("Regression coefficients:")
          print("  intercept" + ":\t" + str(regr.intercept_))
          column = 0
          for ideriv in range(order+1):
               print("  " + str(ideriv) + "th derivative:")
               for jj, station in enumerate(stations):
                    if jj > 0:
                         print("    Station (Adv): " + station)
                    else:
                         print("    Station: " + station)
                    for ii, feature in enumerate(features):
                         print("       " + feature + ":\t" + str(regr.coef_[column]))
                         column += 1
     return featureData, target, model_params