def reg_svm(xTrain, yTrain, xTest, yTest): xData = [] yData = [] print "SVM" clf = svm.SVR(kernel='linear') clf.fit(xTrain, yTrain) pred = clf.predict(xTest) err2 = statistics.mape(pred, yTest) mse = math.pow(statistics.normRmse(pred, yTest), 2) accu = (1 - err2) * 100 print("Error Rate :" + str(err2) + "\n\n") return mse, accu, pred, yTest
def reg_linear(xTrain, yTrain, xTest, yTest): xData = [] yData = [] print "Linear Regression" clf = linear_model.LinearRegression() clf.fit(xTrain, yTrain) pred = clf.predict(xTest) err2 = statistics.mape(pred, yTest) mse = math.pow(statistics.normRmse(pred, yTest), 2) accu = (1 - err2) * 100 print("Error Rate :" + str(err2) + "\n\n") return mse, accu, pred, yTest
def clustering(zone_id, time, series): print "zone", zone_id, ":" x_train, y_train, x_test, y_test, test_indices, slope, intercept = pre_processing_data( time, series) # 计算质心和标签 centroids_k_7, labels_k_7 = kmeans_clustering(x_train, 7) centroids_k_24, labels_k_24 = kmeans_clustering(x_train, 24) centroids = [centroids_k_7, centroids_k_24] labels = [labels_k_7, labels_k_24] alg_names = ["(K-Means(7))", "(K-Means(24))"] for i in range(len(centroids)): centroid = centroids[i] label = labels[i] cluster_sets = [] for x in range(len(centroid)): cluster_sets.append([]) for x in range(len(label)): cluster_sets[label[x]].append((x_train[x], y_train[x])) predict = predict_clustering(centroid, cluster_sets, x_test) trend_predict = np.zeros(len(predict)) for j in range(len(predict)): trend_predict[j] = predict[j] + (slope * test_indices[j] + intercept) trend_predict = [np.exp(x) - 1 for x in trend_predict] predicts = trend_predict # 计算误差 mse = stats.mean_squared_error(y_test, predicts) nrmse = stats.normalized_rmse(y_test, predicts) mape = stats.mape(y_test, predicts) print alg_names[ i], ":", "mse:", mse, " nrmse:", nrmse, " mape:", mape plot.plot_clustering(y_test, predicts, algorithm=alg_names[i], zone_id=zone_id)
def main(): # 获取训练数据 mean_load, std_load, inputs_full, inputs, outputs_full, outputs = set_training_data( ) # 设置参数 tsteps = 1 batch_size = 2 epochs = 10 hidden_size = 100 LSTM_laysers_num = 3 # 构造LSTM神经网络模型 print "Creating Model..." model = Sequential() model.add( LSTM(hidden_size, batch_input_shape=(batch_size, tsteps, inputs.shape[2]), return_sequences=True, stateful=True)) for i in range(2, LSTM_laysers_num): model.add( LSTM(hidden_size, batch_input_shape=(batch_size, tsteps, hidden_size), return_sequences=True, stateful=True)) model.add( LSTM(hidden_size, batch_input_shape=(batch_size, tsteps, hidden_size), return_sequences=False, stateful=True)) model.add(Dense(outputs.shape[1])) model.compile(loss='mean_squared_error', optimizer='rmsprop') print model.summary() # 训练模型 print "Training..." for i in range(epochs): print 'Epoch', i + 1, '/', epochs model.fit(inputs, outputs, batch_size=batch_size, verbose=2, epochs=1, shuffle=False) model.reset_states() # 进行预测 print "Predicting..." model.reset_states() predicted_output = model.predict(inputs_full, batch_size=batch_size) predicted_output = de_normalization(mean_load, std_load, predicted_output, if_log=True) expected_output = de_normalization(mean_load, std_load, outputs_full, if_log=True) # 计算20个区域总的用电负荷 predicted_temp = np.zeros((len(predicted_output), 1)) expected_temp = np.zeros((len(expected_output), 1)) for i in range(len(predicted_output)): predicted_temp[i][0] = np.sum(predicted_output[i]) expected_temp[i][0] = np.sum(expected_output[i]) predicted_output = np.concatenate((predicted_output, predicted_temp), axis=1) expected_output = np.concatenate((expected_output, expected_temp), axis=1) # 将2008/06/17-2008/06/30用电负荷数据作为测试数据 predict_future = predicted_output[-1 - 7 * 2 * 24 - 18:-18] expect_future = expected_output[-1 - 7 * 2 * 24 - 18:-18] predict_future = predict_future.reshape(len(predict_future), 21) expect_future = expect_future.reshape(len(expect_future), 21) # 计算误差 for i in range(0, 21): mse = stats.mean_squared_error(list(expect_future[:, i]), list(predict_future[:, i])) nrmse = stats.normalized_rmse(list(expect_future[:, i]), list(predict_future[:, i])) mape = stats.mape(list(expect_future[:, i]), list(predict_future[:, i])) print "zone", i + 1, ":", "mse:", mse, " nrmse:", nrmse, " mape:", mape # 可视化制图 plot.plot_comparison(expect_future, predict_future, plot_num=21, algorithm="(LSTM)")
def main(): # 获取训练数据 time_load_temperature, expect_future = pre_processing_data() features, x_train, y_train, combine_zones = set_training_data( time_load_temperature) # 构建特征图 create_feature_map(features) # 设置XGBoost模型参数 xgb_params = { "objective": "reg:linear", "eta": 0.01, "max_depth": 8, "seed": 42, "silent": 1 } num_rounds = 500 train_data = xgb.DMatrix(x_train, label=y_train) gbdt = xgb.train(xgb_params, train_data, num_rounds) # 对特征重要性进行排名 importance = gbdt.get_fscore(fmap='xgb.fmap') importance = sorted(importance.items(), key=op.itemgetter(1)) df = pd.DataFrame(importance, columns=['feature', 'fscore']) df['fscore'] = df['fscore'] / df['fscore'].sum() # 显示特征相对排名 plot.plot_feature(df) # 合并特征数据 fit_data = merge_features(combine_zones) predict_future = pd.DataFrame() for zone_id in range(1, 21): fit_station = fit_zone_station(fit_data, expect_future, zone_id) predict = matching_zone_station(fit_data, zone_id, fit_station) predict = pd.DataFrame(predict) predict_future = pd.concat([predict_future, predict], axis=1) predict_future = predict_future.ix[:, [ 0, 11, 13, 14, 15, 16, 17, 18, 19, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12 ]] predict_future = pd.DataFrame(predict_future) # 计算20个区域总的用电负荷 predict_total = np.zeros((len(predict_future), 1)) expect_total = np.zeros((len(predict_future), 1)) for i in range(len(predict_future)): predict_total[i][0] = np.sum(predict_future.iloc[i]) expect_total[i][0] = np.sum(expect_future.iloc[i]) predict_future = np.concatenate((predict_future, predict_total), axis=1) expect_future = np.concatenate((expect_future, expect_total), axis=1) # 计算误差 for i in range(0, 21): mse = stats.mean_squared_error(list(expect_future[:, i]), list(predict_future[:, i])) nrmse = stats.normalized_rmse(list(expect_future[:, i]), list(predict_future[:, i])) mape = stats.mape(list(expect_future[:, i]), list(predict_future[:, i])) print "zone", i + 1, ":", "mse:", mse, " nrmse:", nrmse, " mape:", mape # 可视化制图 plot.plot_comparison(expect_future, predict_future, plot_num=21, algorithm="(XGBoost)")
xData.append(row) yData.append(rowy) #print "cutoff"+str(cutoff) print (xData) print (yData) cu=len(xData)-720 cutoff = len(xData)-30 #print(cutoff) xTrain = xData[cu:cutoff] #print xTrain[47] #print xTrain yTrain = yData[cu:cutoff] xTest = xData[cutoff:] #print cutoff #print xTest[0] yTest = yData[cutoff:] print (yTest) print ("SVM") for k in ['sigmoid','linear']: clf = svm.SVR(kernel=k) clf.fit(xTrain, yTrain) forecast_set = clf.predict(xTest) confidence = clf.score(xTest, yTest) print (forecast_set) err2=statistics.mape(forecast_set,yTest) print(k,"error rate:"+str(err2),"% Accuracy : "+str((1-err2)*100))
print('Lowpass coefficient estimation finish.') # mu, sigma_2, cd_pred = generateData(cd[0:train_row], outputnum=len(cd)-24-train_row) d2_pred = d2_model.predict(d2_X_test)[:, 0] d1_pred = d1_model.predict(d1_X_test)[:, 0] print('Highpass coefficient estimation finish.') # print(np.shape(ca_pred),np.shape(cd_pred)) # predicted_values = idwt((ca_pred + shifted_value_ca)* 10, cd_pred) print(np.shape(a2_pred), np.shape(d2_pred), np.shape(d1_pred)) predicted_values = pywt.waverec([a2_pred, d2_pred, d1_pred], 'db4') # predicted_values = idwt(ca_pred+ca_shifted_value, cd_pred+cd_shifted_value) print('IDWT finish.') # mape = statistics.mape([y_test_true[i]*1000 for i in range(0,len(y_test_true))],(predicted_values)*1000 print(len(y_test_true), len(predicted_values)) mape = statistics.mape([(y_test_true[i] + shifted_value) * 1000 for i in range(0, len(y_test_true))], (predicted_values + shifted_value) * 1000) print('MAPE is ', mape) mae = statistics.mae([(y_test_true[i] + shifted_value) * 1000 for i in range(0, len(y_test_true))], (predicted_values + shifted_value) * 1000) print('MAE is ', mae) mse = statistics.meanSquareError([(y_test_true[i] + shifted_value) * 1000 for i in range(0, len(y_test_true))], (predicted_values + shifted_value) * 1000) print('MSE is ', mse) rmse = math.sqrt(mse) print('RMSE is ', rmse) nrmse = statistics.normRmse([(y_test_true[i] + shifted_value) * 1000 for i in range(0, len(y_test_true))], (predicted_values + shifted_value) * 1000)
#print xTrain[47] #print xTrain yTrain = yData[cu:cutoff] xTest = xData[cutoff:] #print cutoff #print xTest[0] yTest = yData[cutoff:] #print (yTest) classifiers = [ svm.SVR(kernel='linear'), linear_model.LassoLars(), linear_model.ARDRegression(), linear_model.TheilSenRegressor(), linear_model.LinearRegression() ] for item in classifiers: print "------------------------------------------------------------------" print(item) clf = item clf.fit(xTrain, yTrain) pred = clf.predict(xTest) print(pred) print(yTest) err2 = statistics.mape(pred, yTest) print("Error Rate :" + str(err2) + "\n") print("% Accuracy :" + str((1 - err2) * 100) + "\n") print "------------------------------------------------------------------"
pd2 = model_d2.predict(params=result_d2.params, start=1, end=len(d2) + delta[1]) pd1 = model_d1.predict(params=result_d1.params, start=1, end=len(d1) + delta[2]) # 重构 predict_values = pywt.waverec([pa2, pd2, pd1], 'db4') print(np.shape(predict_values)) # 画出重构后的序列 plt.figure(figsize=(15, 5)) plt.plot(list_hourly_load, label="$true$", c='green') plt.plot(predict_values, label="$predict$", c='red') plt.show() # 评估 # mape = statistics.mape([y_test_true[i]*1000 for i in range(0,len(y_test_true))],(predicted_values)*1000 print(len(list_hourly_load), len(predict_values)) mape = statistics.mape((list_hourly_load + shifted_value) * 1000, (predict_values + shifted_value) * 1000) print('MAPE is ', mape) mae = statistics.mae((list_hourly_load + shifted_value) * 1000, (predict_values + shifted_value) * 1000) print('MAE is ', mae) mse = statistics.meanSquareError((list_hourly_load + shifted_value) * 1000, (predict_values + shifted_value) * 1000) print('MSE is ', mse) rmse = math.sqrt(mse) print('RMSE is ', rmse) nrmse = statistics.normRmse((list_hourly_load + shifted_value) * 1000, (predict_values + shifted_value) * 1000) print('NRMSE is ', nrmse)
def neuralNetwork(file, days): xData = [] yData = [] BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) book = xlrd.open_workbook("%s\media\uploadedfile\%s" % (BASE_DIR, file)) sheet = book.sheet_by_index(0) for rx in range(1, sheet.nrows): #row = sheet.row(rx)[3:] #row = [row[x].value for x in range(0,len(row)-4)] row = sheet.row(rx)[1:12] #including temps rowy = sheet.row(rx)[12] #total of next day row = [row[x].value for x in range(0, len(row))] rowy = rowy.value xData.append(row) yData.append(rowy) #print (xData) #print (yData) cu = len(xData) - 720 cutoff = len(xData) - days #print(cutoff) xTrain = xData[cu:cutoff] yTrain = yData[cu:cutoff] xTest = xData[cutoff:] yTest = yData[cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(xData)) #print ('ho') #print (indices) trainIndices = indices[cu:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [7, 8, 10, 11] neurons = [300, 500, 500, 500] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] trendedPred = [] accu = [] mse = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x]) # Add the trend back into the predictions temp1 = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred.append([math.exp(z) for z in temp1]) # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred[x]) err2 = statistics.mape(yTest, trendedPred[x]) accu.append((1 - err2) * 100) mse.append(math.pow(err, 2)) # Append computed predictions to list for classifier predictions preds.append(trendedPred[x]) print("Error Rate :" + str(err2) + "\n\n") # print "The NRMSE for the neural network is " + str(err) + "..." # print "The %Accuracy for the neural network is " + str((1-err2)*100) + "...\n" max_val = max(accu) index_max = accu.index(max_val) return mse[index_max], accu[index_max], trendedPred[index_max], yTest '''
def neuralNetwork(): # Retrieve time series data & apply preprocessing #print tdata # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value xData = [] yData = [] book = xlrd.open_workbook("data/data_with_9_variable.xlsx") sheet = book.sheet_by_index(0) for rx in range(1, sheet.nrows): #row = sheet.row(rx)[3:] #row = [row[x].value for x in range(0,len(row)-4)] row = sheet.row(rx)[1:12] #including temps rowy = sheet.row(rx)[12] #total of next day row = [row[x].value for x in range(0, len(row))] rowy = rowy.value xData.append(row) yData.append(rowy) #print "cutoff"+str(cutoff) print(xData) print(yData) cu = len(xData) - 720 cutoff = len(xData) - 30 print(cutoff) xTrain = xData[cu:cutoff] #print xTrain[47] #print xTrain yTrain = yData[cu:cutoff] xTest = xData[cutoff:] #print cutoff #print xTest[0] yTest = yData[cutoff:] print(yTest) # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(xData)) print('ho') print(indices) trainIndices = indices[cu:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [7, 8, 10, 11] neurons = [300, 500, 500, 500] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) err2 = statistics.mape(yTest, trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." print "The %Accuracy for the neural network is " + str( (1 - err2) * 100) + "...\n" preds.append(yTest) names.append("actual") visualizer.comparisonPlot( 2014, 1, 1, preds, names, plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")