def compute_mse(regressor, param): # get wind park and corresponding target. forecast is for the target # wind mill park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() # use power mapping for pattern-label mapping. Feature window length # is 3 time steps and time horizon (forecast) is 3 time steps. feature_window = 6 horizon = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_mill(target, feature_window, horizon) # train roughly for the year 2004. train_to = int(math.floor(len(X) * 0.5)) # test roughly for the year 2005. test_to = len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 if(regressor == 'rf'): # random forest regressor reg = RandomForestRegressor(n_estimators=param, criterion='mse') reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) elif(regressor == 'knn'): # TODO the regressor does not need to be newly trained in # the case of KNN reg = KNeighborsRegressor(param, 'uniform') # fitting the pattern-label pairs reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) else: raise Exception("No regressor set.") # naive is also known as persistence model. naive_hat = zeros(len(y_hat), dtype = float32) for i in range(0, len(y_hat)): # naive label is the label as horizon time steps before. # we have to consider to use only the fifth label here, too. naive_hat[i] = Y[train_to + (i * test_step) - horizon] # computing the mean squared errors of Linear and naive prediction. mse_y_hat, mse_naive_hat = 0, 0 for i in range(0, len(y_hat)): y = Y[train_to + (i * test_step)] mse_y_hat += (y_hat[i] - y) ** 2 mse_naive_hat += (naive_hat[i] - y) ** 2 mse_y_hat /= float(len(y_hat)) mse_naive_hat /= float(len(y_hat)) return mse_y_hat, mse_naive_hat
from sklearn.grid_search import GridSearchCV from sklearn import linear_model # get windpark and corresponding target. forecast is for the target windmill park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004, 2005) target = windpark.get_target() # use power mapping for pattern-label mapping. Feature window length is 3 time # steps and time horizon (forecast) is 3 time steps. feature_window = 3 horizon = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_mill(target, feature_window, horizon) # train roughly for the year 2004. train_to = int(math.floor(len(X) * 0.5)) # test roughly for the year 2005. test_to = len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 # fitting the pattern-label pairs reg = linear_model.LinearRegression() reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step])