def compute_mse(regressor, horizon): # get wind park and corresponding target. windpark = NREL().get_windpark(NREL.park_id['tehachapi'], 3, 2004, 2005) target = windpark.get_target() # use power mapping for pattern-label mapping. feature_window = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) y = mapping.get_labels_turbine(target, feature_window, horizon) # train roughly for the year 2004, test for 2005. train_to = int(math.floor(len(X) * 0.5)) test_to = len(X) train_step, test_step = 25, 25 X_train=X[:train_to:train_step] y_train=y[:train_to:train_step] X_test=X[train_to:test_to:test_step] y_test=y[train_to:test_to:test_step] if(regressor == 'svr'): reg = SVR(kernel='rbf', epsilon=0.1, C = 100.0,\ gamma = 0.0001).fit(X_train,y_train) mse = mean_squared_error(reg.predict(X_test),y_test) elif(regressor == 'knn'): reg = KNeighborsRegressor(10, 'uniform').fit(X_train,y_train) mse = mean_squared_error(reg.predict(X_test),y_test) return mse
def compute_mse(regressor, param): # get wind park and corresponding target. forecast is for the target # turbine park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() # use power mapping for pattern-label mapping. Feature window length # is 3 time steps and time horizon (forecast) is 3 time steps. feature_window = 6 horizon = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_turbine(target, feature_window, horizon) # train roughly for the year 2004. train_to = int(math.floor(len(X) * 0.5)) # test roughly for the year 2005. test_to = len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 if(regressor == 'rf'): # random forest regressor reg = RandomForestRegressor(n_estimators=param, criterion='mse') reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) elif(regressor == 'knn'): # TODO the regressor does not need to be newly trained in # the case of KNN reg = KNeighborsRegressor(param, 'uniform') # fitting the pattern-label pairs reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) else: raise Exception("No regressor set.") # naive is also known as persistence model. naive_hat = zeros(len(y_hat), dtype = float32) for i in range(0, len(y_hat)): # naive label is the label as horizon time steps before. # we have to consider to use only the fifth label here, too. naive_hat[i] = Y[train_to + (i * test_step) - horizon] # computing the mean squared errors of Linear and naive prediction. mse_y_hat, mse_naive_hat = 0, 0 for i in range(0, len(y_hat)): y = Y[train_to + (i * test_step)] mse_y_hat += (y_hat[i] - y) ** 2 mse_naive_hat += (naive_hat[i] - y) ** 2 mse_y_hat /= float(len(y_hat)) mse_naive_hat /= float(len(y_hat)) return mse_y_hat, mse_naive_hat
def compute_mse(regressor, param): # get wind park and corresponding target. forecast is for the target # turbine park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() # use power mapping for pattern-label mapping. Feature window length # is 3 time steps and time horizon (forecast) is 3 time steps. feature_window = 6 horizon = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_turbine(target, feature_window, horizon) # train roughly for the year 2004. train_to = int(math.floor(len(X) * 0.5)) # test roughly for the year 2005. test_to = len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 if (regressor == 'rf'): # random forest regressor reg = RandomForestRegressor(n_estimators=param, criterion='mse') reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) elif (regressor == 'knn'): # TODO the regressor does not need to be newly trained in # the case of KNN reg = KNeighborsRegressor(param, 'uniform') # fitting the pattern-label pairs reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) else: raise Exception("No regressor set.") # naive is also known as persistence model. naive_hat = zeros(len(y_hat), dtype=float32) for i in range(0, len(y_hat)): # naive label is the label as horizon time steps before. # we have to consider to use only the fifth label here, too. naive_hat[i] = Y[train_to + (i * test_step) - horizon] # computing the mean squared errors of Linear and naive prediction. mse_y_hat, mse_naive_hat = 0, 0 for i in range(0, len(y_hat)): y = Y[train_to + (i * test_step)] mse_y_hat += (y_hat[i] - y)**2 mse_naive_hat += (naive_hat[i] - y)**2 mse_y_hat /= float(len(y_hat)) mse_naive_hat /= float(len(y_hat)) return mse_y_hat, mse_naive_hat
def experiment(method, windpark, windpark_test, damaged, rate): args, nseries = argfuncs[method](windpark) reconstructed = interpolate(damaged, **args) target = windpark.get_target() measurements = repair_nrel(target.get_measurements()[:10000]) turbines = windpark.get_turbines() for t in range(len(turbines)): turbines[t].add_measurements(\ repair_nrel(turbines[t].get_measurements()[:10000])) # this is the target turbine, use the reconstructed here. turbines[-1].add_measurements(reconstructed) feature_window, horizon = 3,3 mapping = PowerMapping() # with damaged X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_turbine(target, feature_window, horizon) train_to = int(math.floor(len(X))) train_step, test_step = 1, 1 reg = linear_model.LinearRegression() reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) # USE THE 2005 YEAR FOR TESTING, WITHOUT DAMAGE # predict on second year without damage turbines = windpark_test.get_turbines() for t in turbines: t.add_measurements(repair_nrel(t.get_measurements()[:10000])) target_test = windpark_test.get_target() XT = mapping.get_features_park(windpark_test, feature_window, horizon) test_to = int(math.floor(len(XT))) YT = mapping.get_labels_turbine(target_test, feature_window, horizon)[:test_to] y_hat = reg.predict(XT[:test_to]) mse_y_hat = 0 for i in range(0, len(y_hat)): y = YT[i] mse_y_hat += (y_hat[i] - y) ** 2 mse_y_hat /= float(len(y_hat)) return mse_y_hat
def setUpClass(cls): ds = NREL() cls.turbine = ds.get_turbine(NREL.park_id['tehachapi'], 2004, 2005) cls.windpark = ds.get_windpark(NREL.park_id['tehachapi'], 3, 2004, 2005) cls.pmapping = PowerMapping() cls.pdmapping = PowerDiffMapping()
def fun(citynum,methodnum,K): park_id = NREL.park_id[cityname[citynum]] windpark = NREL().get_windpark(park_id, 10, 2004,2006) pla=[] kk=windpark.get_turbines() for i in range(len(kk)): pla.append(kk[i].idx) feature_window, horizon = 3, 3 mapping = PowerMapping() data_1 = np.array(mapping.get_features_park(windpark, feature_window, horizon)) data_train = np.array(mapping.get_features_park(windpark, 1, 1)) lendata=len(data_1) data1 = data_1[:lendata:3] l1=len(data_train) data_train1=data_train[:l1:3] half=int(math.floor(len(data1) * 0.5)) traindata_1=data_train1[0:half,:] traindata1=np.transpose(traindata_1) traindata1=preprocessing.scale(np.array(traindata1),with_mean=True,with_std=True) if methodnum==0: ans = KMeans(n_clusters=K, random_state=0).fit(traindata1).predict(traindata1) if methodnum==1: ans = SpectralClustering(n_clusters=K, random_state=0).fit_predict(traindata1) if methodnum==2: ans = AgglomerativeClustering(n_clusters=K).fit_predict(traindata1) if methodnum==3: ans = Birch(n_clusters=K).fit_predict(traindata1) if methodnum==4: ans = DBSCAN(eps = 0.1).fit_predict(traindata1) fo = open('cluster10/'+cityname[citynum]+method[methodnum]+str(K)+'.csv','w', newline='') csv_write = csv.writer(fo,dialect='excel') for i in range(len(ans)): cc=[]; cc.append(pla[i]) cc.append(ans[i]) csv_write.writerow(cc) fo.close()
def experiment(method, windpark, windpark_test, damaged, rate): args, nseries = argfuncs[method](windpark) reconstructed = interpolate(damaged, **args) target = windpark.get_target() measurements = repair_nrel(target.get_measurements()[:10000]) turbines = windpark.get_turbines() for t in range(len(turbines)): turbines[t].add_measurements(\ repair_nrel(turbines[t].get_measurements()[:10000])) # this is the target turbine, use the reconstructed here. turbines[-1].add_measurements(reconstructed) feature_window, horizon = 3, 3 mapping = PowerMapping() # with damaged X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_turbine(target, feature_window, horizon) train_to = int(math.floor(len(X))) train_step, test_step = 1, 1 reg = linear_model.LinearRegression() reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) # USE THE 2005 YEAR FOR TESTING, WITHOUT DAMAGE # predict on second year without damage turbines = windpark_test.get_turbines() for t in turbines: t.add_measurements(repair_nrel(t.get_measurements()[:10000])) target_test = windpark_test.get_target() XT = mapping.get_features_park(windpark_test, feature_window, horizon) test_to = int(math.floor(len(XT))) YT = mapping.get_labels_turbine(target_test, feature_window, horizon)[:test_to] y_hat = reg.predict(XT[:test_to]) mse_y_hat = 0 for i in range(0, len(y_hat)): y = YT[i] mse_y_hat += (y_hat[i] - y)**2 mse_y_hat /= float(len(y_hat)) return mse_y_hat
import math import matplotlib.pyplot as plt from numpy import zeros, float32 from windml.datasets.nrel import NREL from windml.mapping.power_mapping import PowerMapping from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_squared_error # get windpark and corresponding target. forecast is for the target turbine park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004, 2005) target = windpark.get_target() # use power mapping for pattern-label mapping. feature_window, horizon = 3, 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) y = mapping.get_labels_turbine(target, feature_window, horizon) # train roughly for the year 2004, test roughly for the year 2005. train_to, test_to = int(math.floor(len(X) * 0.5)), len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 X_train = X[:train_to:train_step] y_train = y[:train_to:train_step] X_test = X[train_to:test_to:test_step] y_test = y[train_to:test_to:test_step] # initialize and fit a KNN regressor from sklearn with k=10 neighbors. reg = KNeighborsRegressor(10, 'uniform').fit(X_train, y_train) # run the knn regression
import time import csv from windml.datasets.nrel import NREL from windml.mapping.power_mapping import PowerMapping from sklearn import preprocessing from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error start = time.clock() park_id = NREL.park_id['cheyenne'] windpark = NREL().get_windpark(park_id, 5, 2004,2006) target = windpark.get_target() feature_window, horizon = 3, 3 mapping = PowerMapping() data_1 = np.array(mapping.get_features_park(windpark, feature_window, horizon)) data_2 = np.array(mapping.get_labels_turbine(target, feature_window, horizon)).reshape(-1,1) #目标多向前取feature_window个点 #data1 = np.array(mapping.get_features_park(windpark, feature_window*2, horizon)) #data2 = np.array(mapping.get_labels_turbine(target, feature_window*2, horizon)).reshape(-1,1) #region = int(data1.shape[1]/(feature_window*2)) #for i in range(region-1): # for j in range(feature_window): # data1=np.delete(data1,[(i+1)*feature_window],axis = 1) # # lendata=len(data_1) data1 = data_1[:lendata:3]
from windml.datasets.nrel import NREL from windml.mapping.power_mapping import PowerMapping from sklearn.grid_search import GridSearchCV from sklearn import linear_model # get windpark and corresponding target. forecast is for the target windmill park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004, 2005) target = windpark.get_target() # use power mapping for pattern-label mapping. Feature window length is 3 time # steps and time horizon (forecast) is 3 time steps. feature_window = 3 horizon = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_mill(target, feature_window, horizon) # train roughly for the year 2004. train_to = int(math.floor(len(X) * 0.5)) # test roughly for the year 2005. test_to = len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 # fitting the pattern-label pairs reg = linear_model.LinearRegression() reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step])