def RFIndustry(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype, n_estimators=50, city="云南省"): """ Parameters ---------- StartYear : str 历史数据起始年份 EndYear : str 历史数据终止年份 PreStartYear : str 预测起始年份 PreEndYear : str 预测终止年份 timestep : int 训练数据步长, 常常大于预测时间段的2倍 n_estimators : int 随机森林数目个数. The default is 50. pretype : str, optional 预测类型:"consumption"、"load". The default is "consumption". city : str, optional 选择城市. The default is "云南省". Returns ------- "trainfromyear":StartYear "traintoyear":EndYear "trainresult":ytrain, array 训练结果 "prefromyear":PreStartYear "pretoyear":PreEndYear "preresult":ypre, array 预测结果 "MAPE":mape, float "RMSE":rmse, float """ if timestep > (int(EndYear) - int(StartYear) + 1): raise ValueError("训练步长过大,请调整后重试") elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) + timestep): raise ValueError("历史时间长度小于预测时间长度,请增加历史时间长度或减小预测时间长度") else: name = [pretype] finaldata = [] outputlen = int(PreEndYear) - int(PreStartYear) + 1 #读取历史负荷数据 datajson = getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T test_size = 0 X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") y["train"].ravel() #构建随机森林模型 rf = RandomForestRegressor(n_estimators) #n_estimators:森林个数 rf.fit(X["train"], y["train"]) testdata = final[pretype].values testinput = [] testoutput = [] num = len(X["train"]) selet = int(np.floor(num / 2)) testinput = X["train"][selet:, :] testoutput = y["train"][selet:, :] #训练结果 y_rf = rf.predict(testinput) y_rf_real = np.array(y_rf).reshape(-1, 1) #训练数据预测结果 y_real = np.array(testoutput).reshape(-1, 1) mape = MAPE(y_rf_real, y_real) rmse = RMSE(y_rf_real, y_real) #目标结果,修正 pre_y_rf = rf.predict( np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape( 1, -1)) + 500 #保存训练结果 trainyear = [] for t in y_real: count = -1 for d in final[pretype]: count += 1 if t > d - 5 and t < d + 5: # print("yes") trainyear.append(final.index[count]) break ytrain = y_rf_real.flatten() ypre = pre_y_rf.flatten() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result
def BPNNIndustry(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype, city="云南省", hidden=[24, 12], learningrate=0.005, epoch=1000): """ Parameters ---------- StartYear : TYPE DESCRIPTION. EndYear : TYPE DESCRIPTION. PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. timestep : TYPE DESCRIPTION. pretype : TYPE, optional DESCRIPTION. The default is "consumption". city : TYPE, optional DESCRIPTION. The default is "云南省". hidden : TYPE, optional 神经网络的隐藏层, list, 几个元素代表几层,每层神经元个数为list元素值. The default is [24,12]. learningrate : TYPE, optional 神经网络学习率. The default is 0.005. epoch : TYPE, optional 训练学习次数. The default is 1000. Returns ------- None. """ def bpnn(timestep, outputlen, x_train, y_train, x_test, y_test, x_pre, hiddenneron, lr, epoch): x = tf.placeholder(tf.float32, shape=[None, timestep], name="Input") y = tf.placeholder(tf.float32, shape=[None, outputlen], name="Onput") hlen = len(hiddenneron) f = locals() for i in range(hlen + 1): if i == 0: f["f%s" % (i + 1)] = tf.contrib.layers.fully_connected( x, hiddenneron[i]) else: if i == hlen: pre = tf.contrib.layers.fully_connected( f["f%s" % (i)], outputlen) else: f["f%s" % (i + 1)] = tf.contrib.layers.fully_connected( f["f%s" % (i)], hiddenneron[i]) loss = tf.losses.mean_squared_error(y, pre) train_op = tf.train.AdamOptimizer(lr).minimize(loss) saver = tf.train.Saver() with tf.Session() as sess: init = tf.initialize_all_variables() sess.run(init) for i in range(epoch): sess.run(train_op, feed_dict={x: x_train, y: y_train}) lossz = sess.run(loss, feed_dict={x: x_train, y: y_train}) if i % 50 == 0: print(lossz) y_train_pre = sess.run(pre, feed_dict={x: x_train}) y_test_pre = sess.run(pre, feed_dict={x: x_test}) y_pre = sess.run(pre, feed_dict={x: x_pre}) training = np.array(y_train_pre).squeeze() predictions = np.array(y_test_pre).squeeze() labels = np.array(y_test).squeeze() # saver.save(sess, "D:/lab/Yunnan_Pre/result/yunnan_shortterm_钢铁_BPNN/") return predictions, labels, y_pre, training if timestep > (int(EndYear) - int(StartYear) + 1) * 0.5: raise ValueError("训练步长过大,请调整后重试") elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) + timestep): raise ValueError("历史时间长度小于预测时间长度与训练步长之和, 请调整后重试") else: #读取数据,确定参数 name = [pretype] finaldata = [] outputlen = int(PreEndYear) - int(PreStartYear) + 1 datajson = getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T test_size = 0 #测试数据集应当取0才可以 X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") testdata = final[pretype].values testinput = [] testoutput = [] num = len(X["train"]) selet = int(np.floor(num / 2)) testinput = X["train"][selet:, :] testoutput = y["train"][selet:, :] x_pre = np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape( 1, -1) test_pre, test_label, pre, training = bpnn( timestep, outputlen, X["train"][:-1, :], y["train"][:-1, :], testinput, testoutput, x_pre, hidden, learningrate, epoch) mape = MAPE(test_pre, test_label) rmse = RMSE(test_pre, test_label) #保存训练结果,年份上可能有问题 #trainingtrue=y["train"][:-1,:].flatten() trainingtrue = y["train"][-1, :] trainyear = [] for t in trainingtrue: count = -1 for d in final[pretype]: count += 1 if t > d - 5 and t < d + 5: # print("yes") trainyear.append(final.index[count]) break ytrain = training[-1] ypre = pre.flatten() #trainsave.to_csv("D:/lab/Yunnan_Pre/result/yunnan_shortterm_consumption_BPNN_training.csv") result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } #保存 return result
def LSTMpre(StartYear,EndYear,PreStartYear,PreEndYear,timestep,pretype="全社会用电量",city="云南省", hidden_size=24,hidden_layer=1, learningrate=0.005,epoch=1000): #搭建LSTM模块 def LSTM(x,y,outputlen,is_training,hidden_size,num_layers,lr,optimizer,keep_pro): cell=tf.nn.rnn_cell.BasicLSTMCell if is_training and keep_pro<1: lstmcell=tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(cell(hidden_size,activation=tf.nn.softsign),output_keep_prob=keep_pro) for _ in range(num_layers)]) else: lstmcell=tf.nn.rnn_cell.MultiRNNCell([cell(hidden_size) for _ in range(num_layers)]) x=tf.expand_dims(x,axis=2) outputs,current_state=tf.nn.dynamic_rnn(lstmcell,x,dtype=tf.float32) output=outputs[:,-1,:] predictions=tf.contrib.layers.fully_connected(output,outputlen) if not is_training: return predictions,None,None loss=tf.losses.absolute_difference(labels=y,predictions=predictions) train_op=tf.contrib.layers.optimize_loss(loss,tf.train.get_global_step(),optimizer=optimizer,learning_rate=lr) return predictions,loss,train_op #训练模型模块 def trainmodel(sess,outputlen,train_x,train_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step): ds=tf.data.Dataset.from_tensor_slices((train_x,train_y)) ds=ds.repeat().shuffle(100).batch(batch_size) x,y=ds.make_one_shot_iterator().get_next() prediction,loss,train_op=LSTM(x,y,outputlen,True,hidden_size,num_layers,lr,optimizer,keep_pro) losses=[] sess.run(tf.global_variables_initializer()) ytrain=[] for j in range(training_step): y,p,l=sess.run([prediction,train_op,loss]) ytrain.append(y) return ytrain #测试模型模块 def runmodel(sess,outputlen,test_x,test_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step): ds=tf.data.Dataset.from_tensor_slices((test_x,test_y)) ds=ds.batch(1) x,y=ds.make_one_shot_iterator().get_next() prediction,_,_=LSTM(x,[0.0],outputlen,False,hidden_size,num_layers,lr,optimizer,keep_pro) pre=[] label=[] for j in range(len(test_y)): p,l=sess.run([prediction,y]) pre.append(p) label.append(l) pre=np.array(pre).squeeze() labels=np.array(label).squeeze() return pre,labels #预测模型模块 def premodel(sess,outputlen,test_x,test_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step): prediction,_,_=LSTM(test_x,[0.0],outputlen,False,hidden_size,num_layers,lr,optimizer,keep_pro) finalpre=sess.run(prediction) return finalpre #设置参数 if timestep > (int(EndYear)-int(StartYear)+1)*0.5: raise ValueError("训练步长过大,请调整后重试") elif int(EndYear)-int(StartYear)<(int(PreEndYear)-int(PreStartYear)+timestep): raise ValueError("历史时间长度小于 预测时间长度与训练步长之和,请调整后重试") else: optimizer="Adam" keep_pro=0.9 batch_size=16 #读取数据,确定参数 name=[pretype] finaldata=[] outputlen=int(PreEndYear)-int(PreStartYear)+1 datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data=json.loads(datajson) finaldata.append(data) final=pd.DataFrame(finaldata,index=name) final=final.T test_size=0#测试数据集应当取0才可以 X,y=generate_data(final,timestep,outputlen,test_size=test_size,if_norm="no") testdata=final[pretype].values testinput=[] testoutput=[] num=len(X["train"]) selet=int(np.floor(num/2)) testinput=X["train"][selet:,:] testoutput=y["train"][selet:,:] #最终预测需要的数据 x_pre=testdata[-1:-(timestep+1):-1].reshape(1,-1) x_pre=np.array(x_pre, dtype = np.float32) #训练模型并预测结果 tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("LSTM"): ytrain=trainmodel(sess,outputlen,X["train"][:-1,:],y["train"][:-1,:],hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch) with tf.variable_scope("LSTM",reuse=True): test_pre,test_label=runmodel(sess,outputlen,testinput,testoutput,hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch) with tf.variable_scope("LSTM",reuse=True): ypre=premodel(sess,outputlen,x_pre,x_pre,hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch) mape=MAPE(test_pre,test_label) rmse=RMSE(test_pre,test_label) trainyear=[] trainingtrue=y["train"][-1,:] for t in trainingtrue: count=-1 for d in final[pretype]: count+=1 if t>d-5 and t<d+5: # print("yes") trainyear.append(final.index[count]) break ypre=np.array(ypre).squeeze() result={"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse} return result
def GBDT(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype="全社会用电量", city="云南省", LearningRate=0.5, MaxDepth=20, NumberofEstimators=500): if timestep > (int(EndYear) - int(StartYear) + 1): raise ValueError("训练步长过大,请调整后重试.") elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) + timestep): raise ValueError("历史时间长度小于预测时间长度,请增加历史时间长度或减小预测时间长度.") else: #读取数据,确定参数 name = [pretype] finaldata = [] outputlen = int(PreEndYear) - int(PreStartYear) + 1 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T test_size = 0 #测试数据集应当取0.3才可以 X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") gbdt = xgb.XGBRegressor(max_depth=MaxDepth, learning_rate=LearningRate, n_estimators=NumberofEstimators, silent=True, objective='reg:linear', booster='gblinear', n_jobs=50, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, random_state=0, seed=None, missing=None, importance_type='gain') # multi_model = MultiOutputRegressor(gbdt) multi_model.fit(X["train"], y["train"]) testdata = final.values num = len(X["train"]) selet = int(np.floor(num / 2)) testinput = X["train"][selet:, :] testoutput = y["train"][selet:, :] x_pre = np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape( 1, -1) y1_gbdt = multi_model.predict(testinput) y1_gbdt_real = np.array(y1_gbdt).reshape(-1, 1) y1_real = np.array(testoutput).reshape(-1, 1) mape = MAPE(y1_gbdt_real, y1_real) rmse = RMSE(y1_gbdt_real, y1_real) ytrain = y1_gbdt[-1] trainyear = [] for t in testoutput[-1]: count = -1 for d in final[pretype]: count += 1 if t > d - 1 and t < d + 1: trainyear.append(final.index[count]) break pre = multi_model.predict(x_pre) ypre = np.array(pre).flatten().tolist() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre, "MAPE": mape, "RMSE": rmse } #保存 return result
def SVM(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype="全社会用电量", city="云南省"): #读取数据,确定参数 if timestep > (int(EndYear) - int(StartYear) + 1): raise ValueError("训练步长过大,请调整后重试") elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) + timestep): raise ValueError("历史时间长度小于预测时间长度与训练步长之和,请调整后重试") else: name = [pretype] finaldata = [] outputlen = int(PreEndYear) - int(PreStartYear) + 1 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T test_size = 0 #测试数据集应当取0才可以 X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") testdata = final[pretype].values testinput = [] testoutput = [] X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") svr = SVR(kernel="poly", gamma="scale", C=0.1) #kernel="linear","poly" multi_model = MultiOutputRegressor(svr) multi_model.fit(X["train"], y["train"]) testdata = final.values num = len(X["train"]) selet = int(np.floor(num / 2)) testinput = X["train"][selet:, :] testoutput = y["train"][selet:, :] y_svr = multi_model.predict(testinput) y_svr_real = np.array(y_svr).reshape(-1, 1) y_real = np.array(testoutput).reshape(-1, 1) mape = MAPE(y_svr_real, y_real) rmse = RMSE(y_svr_real, y_real) pre = multi_model.predict( np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape( 1, -1)) ytrain = y_svr[-1] trainyear = [] for t in testoutput[-1]: count = -1 for d in final[pretype]: count += 1 if t > d - 1 and t < d + 1: trainyear.append(final.index[count]) break ypre = np.array(pre).flatten() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } #保存 return result