def reg_svm(xTrain, yTrain, xTest, yTest):
    xData = []
    yData = []
    print "SVM"
    clf = svm.SVR(kernel='linear')
    clf.fit(xTrain, yTrain)
    pred = clf.predict(xTest)
    err2 = statistics.mape(pred, yTest)
    mse = math.pow(statistics.normRmse(pred, yTest), 2)
    accu = (1 - err2) * 100
    print("Error Rate :" + str(err2) + "\n\n")
    return mse, accu, pred, yTest
def reg_linear(xTrain, yTrain, xTest, yTest):
    xData = []
    yData = []
    print "Linear Regression"
    clf = linear_model.LinearRegression()
    clf.fit(xTrain, yTrain)
    pred = clf.predict(xTest)
    err2 = statistics.mape(pred, yTest)
    mse = math.pow(statistics.normRmse(pred, yTest), 2)
    accu = (1 - err2) * 100
    print("Error Rate :" + str(err2) + "\n\n")
    return mse, accu, pred, yTest
def clustering(zone_id, time, series):
    print "zone", zone_id, ":"
    x_train, y_train, x_test, y_test, test_indices, slope, intercept = pre_processing_data(
        time, series)
    # 计算质心和标签
    centroids_k_7, labels_k_7 = kmeans_clustering(x_train, 7)
    centroids_k_24, labels_k_24 = kmeans_clustering(x_train, 24)
    centroids = [centroids_k_7, centroids_k_24]
    labels = [labels_k_7, labels_k_24]

    alg_names = ["(K-Means(7))", "(K-Means(24))"]

    for i in range(len(centroids)):
        centroid = centroids[i]
        label = labels[i]
        cluster_sets = []
        for x in range(len(centroid)):
            cluster_sets.append([])
        for x in range(len(label)):
            cluster_sets[label[x]].append((x_train[x], y_train[x]))

        predict = predict_clustering(centroid, cluster_sets, x_test)

        trend_predict = np.zeros(len(predict))
        for j in range(len(predict)):
            trend_predict[j] = predict[j] + (slope * test_indices[j] +
                                             intercept)
        trend_predict = [np.exp(x) - 1 for x in trend_predict]

        predicts = trend_predict
        # 计算误差
        mse = stats.mean_squared_error(y_test, predicts)
        nrmse = stats.normalized_rmse(y_test, predicts)
        mape = stats.mape(y_test, predicts)
        print alg_names[
            i], ":", "mse:", mse, "  nrmse:", nrmse, "  mape:", mape

        plot.plot_clustering(y_test,
                             predicts,
                             algorithm=alg_names[i],
                             zone_id=zone_id)
Example #4
0
def main():
    # 获取训练数据
    mean_load, std_load, inputs_full, inputs, outputs_full, outputs = set_training_data(
    )
    # 设置参数
    tsteps = 1
    batch_size = 2
    epochs = 10
    hidden_size = 100
    LSTM_laysers_num = 3

    # 构造LSTM神经网络模型
    print "Creating Model..."
    model = Sequential()
    model.add(
        LSTM(hidden_size,
             batch_input_shape=(batch_size, tsteps, inputs.shape[2]),
             return_sequences=True,
             stateful=True))
    for i in range(2, LSTM_laysers_num):
        model.add(
            LSTM(hidden_size,
                 batch_input_shape=(batch_size, tsteps, hidden_size),
                 return_sequences=True,
                 stateful=True))
    model.add(
        LSTM(hidden_size,
             batch_input_shape=(batch_size, tsteps, hidden_size),
             return_sequences=False,
             stateful=True))
    model.add(Dense(outputs.shape[1]))
    model.compile(loss='mean_squared_error', optimizer='rmsprop')
    print model.summary()

    # 训练模型
    print "Training..."
    for i in range(epochs):
        print 'Epoch', i + 1, '/', epochs
        model.fit(inputs,
                  outputs,
                  batch_size=batch_size,
                  verbose=2,
                  epochs=1,
                  shuffle=False)
        model.reset_states()

    # 进行预测
    print "Predicting..."
    model.reset_states()
    predicted_output = model.predict(inputs_full, batch_size=batch_size)

    predicted_output = de_normalization(mean_load,
                                        std_load,
                                        predicted_output,
                                        if_log=True)
    expected_output = de_normalization(mean_load,
                                       std_load,
                                       outputs_full,
                                       if_log=True)

    # 计算20个区域总的用电负荷
    predicted_temp = np.zeros((len(predicted_output), 1))
    expected_temp = np.zeros((len(expected_output), 1))
    for i in range(len(predicted_output)):
        predicted_temp[i][0] = np.sum(predicted_output[i])
        expected_temp[i][0] = np.sum(expected_output[i])
    predicted_output = np.concatenate((predicted_output, predicted_temp),
                                      axis=1)
    expected_output = np.concatenate((expected_output, expected_temp), axis=1)

    # 将2008/06/17-2008/06/30用电负荷数据作为测试数据
    predict_future = predicted_output[-1 - 7 * 2 * 24 - 18:-18]
    expect_future = expected_output[-1 - 7 * 2 * 24 - 18:-18]
    predict_future = predict_future.reshape(len(predict_future), 21)
    expect_future = expect_future.reshape(len(expect_future), 21)

    # 计算误差
    for i in range(0, 21):
        mse = stats.mean_squared_error(list(expect_future[:, i]),
                                       list(predict_future[:, i]))
        nrmse = stats.normalized_rmse(list(expect_future[:, i]),
                                      list(predict_future[:, i]))
        mape = stats.mape(list(expect_future[:, i]), list(predict_future[:,
                                                                         i]))
        print "zone", i + 1, ":", "mse:", mse, "  nrmse:", nrmse, "  mape:", mape

    # 可视化制图
    plot.plot_comparison(expect_future,
                         predict_future,
                         plot_num=21,
                         algorithm="(LSTM)")
def main():
    # 获取训练数据
    time_load_temperature, expect_future = pre_processing_data()
    features, x_train, y_train, combine_zones = set_training_data(
        time_load_temperature)

    # 构建特征图
    create_feature_map(features)
    # 设置XGBoost模型参数
    xgb_params = {
        "objective": "reg:linear",
        "eta": 0.01,
        "max_depth": 8,
        "seed": 42,
        "silent": 1
    }
    num_rounds = 500
    train_data = xgb.DMatrix(x_train, label=y_train)
    gbdt = xgb.train(xgb_params, train_data, num_rounds)

    # 对特征重要性进行排名
    importance = gbdt.get_fscore(fmap='xgb.fmap')
    importance = sorted(importance.items(), key=op.itemgetter(1))
    df = pd.DataFrame(importance, columns=['feature', 'fscore'])
    df['fscore'] = df['fscore'] / df['fscore'].sum()
    # 显示特征相对排名
    plot.plot_feature(df)

    # 合并特征数据
    fit_data = merge_features(combine_zones)
    predict_future = pd.DataFrame()
    for zone_id in range(1, 21):
        fit_station = fit_zone_station(fit_data, expect_future, zone_id)
        predict = matching_zone_station(fit_data, zone_id, fit_station)
        predict = pd.DataFrame(predict)
        predict_future = pd.concat([predict_future, predict], axis=1)

    predict_future = predict_future.ix[:, [
        0, 11, 13, 14, 15, 16, 17, 18, 19, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12
    ]]
    predict_future = pd.DataFrame(predict_future)

    # 计算20个区域总的用电负荷
    predict_total = np.zeros((len(predict_future), 1))
    expect_total = np.zeros((len(predict_future), 1))
    for i in range(len(predict_future)):
        predict_total[i][0] = np.sum(predict_future.iloc[i])
        expect_total[i][0] = np.sum(expect_future.iloc[i])
    predict_future = np.concatenate((predict_future, predict_total), axis=1)
    expect_future = np.concatenate((expect_future, expect_total), axis=1)

    # 计算误差
    for i in range(0, 21):
        mse = stats.mean_squared_error(list(expect_future[:, i]),
                                       list(predict_future[:, i]))
        nrmse = stats.normalized_rmse(list(expect_future[:, i]),
                                      list(predict_future[:, i]))
        mape = stats.mape(list(expect_future[:, i]), list(predict_future[:,
                                                                         i]))
        print "zone", i + 1, ":", "mse:", mse, "  nrmse:", nrmse, "  mape:", mape

    # 可视化制图
    plot.plot_comparison(expect_future,
                         predict_future,
                         plot_num=21,
                         algorithm="(XGBoost)")
    xData.append(row)
    yData.append(rowy)
    #print "cutoff"+str(cutoff)
print (xData)
print (yData)
cu=len(xData)-720
cutoff = len(xData)-30
#print(cutoff)
xTrain = xData[cu:cutoff]
  
  #print xTrain[47]
  #print xTrain
yTrain = yData[cu:cutoff]
xTest = xData[cutoff:]
  #print cutoff
  #print xTest[0]
yTest = yData[cutoff:]
print (yTest)
  
print ("SVM")
for k in ['sigmoid','linear']:
    clf = svm.SVR(kernel=k)
    clf.fit(xTrain, yTrain)
    forecast_set = clf.predict(xTest)
    confidence = clf.score(xTest, yTest)
    print (forecast_set)

    err2=statistics.mape(forecast_set,yTest)
    print(k,"error rate:"+str(err2),"% Accuracy : "+str((1-err2)*100))

Example #7
0
print('Lowpass coefficient estimation finish.')
# mu, sigma_2, cd_pred = generateData(cd[0:train_row], outputnum=len(cd)-24-train_row)
d2_pred = d2_model.predict(d2_X_test)[:, 0]
d1_pred = d1_model.predict(d1_X_test)[:, 0]
print('Highpass coefficient estimation finish.')
# print(np.shape(ca_pred),np.shape(cd_pred))
# predicted_values = idwt((ca_pred + shifted_value_ca)* 10, cd_pred)
print(np.shape(a2_pred), np.shape(d2_pred), np.shape(d1_pred))
predicted_values = pywt.waverec([a2_pred, d2_pred, d1_pred], 'db4')
# predicted_values = idwt(ca_pred+ca_shifted_value, cd_pred+cd_shifted_value)
print('IDWT finish.')

# mape = statistics.mape([y_test_true[i]*1000 for i in range(0,len(y_test_true))],(predicted_values)*1000
print(len(y_test_true), len(predicted_values))
mape = statistics.mape([(y_test_true[i] + shifted_value) * 1000
                        for i in range(0, len(y_test_true))],
                       (predicted_values + shifted_value) * 1000)
print('MAPE is ', mape)
mae = statistics.mae([(y_test_true[i] + shifted_value) * 1000
                      for i in range(0, len(y_test_true))],
                     (predicted_values + shifted_value) * 1000)
print('MAE is ', mae)
mse = statistics.meanSquareError([(y_test_true[i] + shifted_value) * 1000
                                  for i in range(0, len(y_test_true))],
                                 (predicted_values + shifted_value) * 1000)
print('MSE is ', mse)
rmse = math.sqrt(mse)
print('RMSE is ', rmse)
nrmse = statistics.normRmse([(y_test_true[i] + shifted_value) * 1000
                             for i in range(0, len(y_test_true))],
                            (predicted_values + shifted_value) * 1000)
Example #8
0
#print xTrain[47]
#print xTrain
yTrain = yData[cu:cutoff]
xTest = xData[cutoff:]
#print cutoff
#print xTest[0]
yTest = yData[cutoff:]
#print (yTest)

classifiers = [
    svm.SVR(kernel='linear'),
    linear_model.LassoLars(),
    linear_model.ARDRegression(),
    linear_model.TheilSenRegressor(),
    linear_model.LinearRegression()
]

for item in classifiers:
    print "------------------------------------------------------------------"
    print(item)
    clf = item
    clf.fit(xTrain, yTrain)
    pred = clf.predict(xTest)
    print(pred)
    print(yTest)
    err2 = statistics.mape(pred, yTest)
    print("Error Rate :" + str(err2) + "\n")
    print("% Accuracy :" + str((1 - err2) * 100) + "\n")
    print "------------------------------------------------------------------"
pd2 = model_d2.predict(params=result_d2.params,
                       start=1,
                       end=len(d2) + delta[1])
pd1 = model_d1.predict(params=result_d1.params,
                       start=1,
                       end=len(d1) + delta[2])
# 重构
predict_values = pywt.waverec([pa2, pd2, pd1], 'db4')
print(np.shape(predict_values))
# 画出重构后的序列
plt.figure(figsize=(15, 5))
plt.plot(list_hourly_load, label="$true$", c='green')
plt.plot(predict_values, label="$predict$", c='red')
plt.show()
# 评估
# mape = statistics.mape([y_test_true[i]*1000 for i in range(0,len(y_test_true))],(predicted_values)*1000
print(len(list_hourly_load), len(predict_values))
mape = statistics.mape((list_hourly_load + shifted_value) * 1000,
                       (predict_values + shifted_value) * 1000)
print('MAPE is ', mape)
mae = statistics.mae((list_hourly_load + shifted_value) * 1000,
                     (predict_values + shifted_value) * 1000)
print('MAE is ', mae)
mse = statistics.meanSquareError((list_hourly_load + shifted_value) * 1000,
                                 (predict_values + shifted_value) * 1000)
print('MSE is ', mse)
rmse = math.sqrt(mse)
print('RMSE is ', rmse)
nrmse = statistics.normRmse((list_hourly_load + shifted_value) * 1000,
                            (predict_values + shifted_value) * 1000)
print('NRMSE is ', nrmse)
Example #10
0
def neuralNetwork(file, days):

    xData = []
    yData = []
    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    book = xlrd.open_workbook("%s\media\uploadedfile\%s" % (BASE_DIR, file))
    sheet = book.sheet_by_index(0)
    for rx in range(1, sheet.nrows):
        #row = sheet.row(rx)[3:]
        #row = [row[x].value for x in range(0,len(row)-4)]
        row = sheet.row(rx)[1:12]  #including temps
        rowy = sheet.row(rx)[12]  #total of next day
        row = [row[x].value for x in range(0, len(row))]
        rowy = rowy.value
        xData.append(row)
        yData.append(rowy)
    #print (xData)
    #print (yData)
    cu = len(xData) - 720
    cutoff = len(xData) - days
    #print(cutoff)
    xTrain = xData[cu:cutoff]

    yTrain = yData[cu:cutoff]
    xTest = xData[cutoff:]
    yTest = yData[cutoff:]
    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(xData))
    #print ('ho')
    #print (indices)
    trainIndices = indices[cu:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    dimensions = [7, 8, 10, 11]
    neurons = [300, 500, 500, 500]

    names = []
    for x in range(len(dimensions)):
        s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
        names.append(s)
    preds = []
    trendedPred = []
    accu = []
    mse = []
    for x in range(len(dimensions)):

        # Perform dimensionality reduction on the feature vectors
        pca = PCA(n_components=dimensions[x])
        pca.fit(xTrain)
        xTrainRed = pca.transform(xTrain)
        xTestRed = pca.transform(xTest)

        pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x])

        # Add the trend back into the predictions
        temp1 = statistics.reapplyTrend(testIndices, pred, slope, intercept)
        # Reverse the normalization
        trendedPred.append([math.exp(z) for z in temp1])
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred[x])
        err2 = statistics.mape(yTest, trendedPred[x])
        accu.append((1 - err2) * 100)
        mse.append(math.pow(err, 2))
        # Append computed predictions to list for classifier predictions
        preds.append(trendedPred[x])
        print("Error Rate :" + str(err2) + "\n\n")
    #  print "The NRMSE for the neural network is " + str(err) + "..."
    #  print "The %Accuracy for the neural network is " + str((1-err2)*100) + "...\n"
    max_val = max(accu)
    index_max = accu.index(max_val)
    return mse[index_max], accu[index_max], trendedPred[index_max], yTest
    '''
Example #11
0
def neuralNetwork():

    # Retrieve time series data & apply preprocessing
    #print tdata
    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    xData = []
    yData = []
    book = xlrd.open_workbook("data/data_with_9_variable.xlsx")
    sheet = book.sheet_by_index(0)
    for rx in range(1, sheet.nrows):
        #row = sheet.row(rx)[3:]
        #row = [row[x].value for x in range(0,len(row)-4)]
        row = sheet.row(rx)[1:12]  #including temps
        rowy = sheet.row(rx)[12]  #total of next day
        row = [row[x].value for x in range(0, len(row))]
        rowy = rowy.value
        xData.append(row)
        yData.append(rowy)
    #print "cutoff"+str(cutoff)
    print(xData)
    print(yData)
    cu = len(xData) - 720
    cutoff = len(xData) - 30
    print(cutoff)
    xTrain = xData[cu:cutoff]

    #print xTrain[47]
    #print xTrain
    yTrain = yData[cu:cutoff]
    xTest = xData[cutoff:]
    #print cutoff
    #print xTest[0]
    yTest = yData[cutoff:]
    print(yTest)

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(xData))
    print('ho')
    print(indices)
    trainIndices = indices[cu:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    dimensions = [7, 8, 10, 11]
    neurons = [300, 500, 500, 500]

    names = []
    for x in range(len(dimensions)):
        s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
        names.append(s)
    preds = []

    for x in range(len(dimensions)):

        # Perform dimensionality reduction on the feature vectors
        pca = PCA(n_components=dimensions[x])
        pca.fit(xTrain)
        xTrainRed = pca.transform(xTrain)
        xTestRed = pca.transform(xTest)

        pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x])

        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)
        err2 = statistics.mape(yTest, trendedPred)
        # Append computed predictions to list for classifier predictions
        preds.append(trendedPred)

        print "The NRMSE for the neural network is " + str(err) + "..."
        print "The %Accuracy for the neural network is " + str(
            (1 - err2) * 100) + "...\n"

    preds.append(yTest)
    names.append("actual")

    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        names,
        plotName="Neural Network Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")