def LDM(PreStartYear, PreEndYear, buildingarea, loaddensity, pretype="全社会用电量", city="云南省"): def Density(n, Dlist, Plist): #n为所画片区,Dlist为对应的负荷密度,Plist为对应的建筑面积 load = 0 for i in range(n): load = Dlist[i] * Plist[i] + load return load data1 = pd.read_csv(buildingarea, encoding="UTF-8") data2 = pd.read_csv(loaddensity, encoding="UTF-8") columns = data1.columns columns2 = data2.columns if len(columns) != len(columns2): raise ValueError("负荷密度和建筑密度列表不匹配,请重新上传") elif not (data1[columns[0]].values == data2[columns2[0]].values).all(): raise ValueError("负荷密度和建筑密度列表不匹配,请重新上传") else: StartYear = str(data1[columns[0]].values[0]) EndYear = str(data1[columns[0]].values[-1]) #预测建筑用地数据 building = predict.pre(data1.loc[:, [columns[0], columns[1]]], columns[1], int(PreStartYear), int(PreEndYear)) for i in range(2, len(columns)): c = predict.pre(data1.loc[:, [columns[0], columns[i]]], columns[i], int(PreStartYear), int(PreEndYear)) building = pd.merge(building, c, on=columns[0]) #预测负荷密度 density = predict.pre(data2.loc[:, [columns2[0], columns2[1]]], columns2[1], int(PreStartYear), int(PreEndYear)) for i in range(2, len(columns2)): c = predict.pre(data2.loc[:, [columns2[0], columns2[i]]], columns2[i], int(PreStartYear), int(PreEndYear)) density = pd.merge(density, c, on=columns2[0]) #读取历史负荷数据 period = int(EndYear) - int(StartYear) + 1 finaldata = [] name = [pretype] datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T trainx = [] start = 0 #训练集的起始位置 for i in range(start, period): d = [building[columns[-1]].values[i]] b = [density[columns[-1]].values[i]] trainx.append(Density(1, d, b)) trainy = [] trainyear = [] for j in range(period): if int(final.index.values[j]) in data1["year"].values[start:]: trainy.append(final[pretype].values[j]) trainyear.append(final.index.values[j]) prex = [] for a in range(period, len(building.values)): d = [building[columns[-1]].values[a]] b = [density[columns[-1]].values[a]] prex.append(Density(1, d, b)) trainx = np.array(trainx).reshape(-1, 1) trainy = np.array(trainy).reshape(-1, 1) prex = np.array(prex).reshape(-1, 1) #训练模型 reg = LinearRegression().fit(trainx, trainy) prey = [x * reg.coef_[0][0] + reg.intercept_[0] for x in prex] pretrainy = [tx * reg.coef_[0][0] + reg.intercept_[0] for tx in trainx] ypre = np.array(prey).reshape(1, -1).squeeze() ytrain = np.array(pretrainy).reshape(1, -1) mape = MAPE(pretrainy, trainx) rmse = RMSE(pretrainy, trainx) #返回结果 result = { "trainfromyear": StartYear, "traintoyear": EndYear, "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result
def PCAIndustry(StartYear,EndYear,PreStartYear,PreEndYear,pretype,econamelist,city="云南省"): if city=="云南省": name=[pretype] finaldata=[] period=int(PreEndYear)-int(PreStartYear)+1 historyyear=np.arange(int(StartYear),int(EndYear)+1) #读取历史负荷数据 datajson=getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear) # print(datajson) data=json.loads(datajson) finaldata.append(data) #读取经济数据 for i in range(len(econamelist)): ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata=json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final=pd.DataFrame(finaldata,index=name) final=final.T final["Year"]=historyyear #预测经济数据 # print(logfinal[econamelist[0]].to_frame().column) eco=preeco.pre(final,econamelist[0],PreStartYear,PreEndYear) for j in range(1,len(econamelist)): c=preeco.pre(final,econamelist[j],PreStartYear,PreEndYear) eco=pd.merge(eco,c,on="year") Index=eco.columns[1:].tolist()#经济特征名称 ##对特征数据进行归一化处理 scaler = StandardScaler() scaler.fit(eco[Index].values) Data_eco_scaler = scaler.transform(eco[Index].values) Data_eco_scaler=pd.DataFrame(data=Data_eco_scaler,columns=Index) Data_eco_scaler["Year"]=eco["year"].values#归一化后的特征数据 #获得训练数据集合测试数据集 train_start_year=int(StartYear) train_end_year=int(StartYear)+math.ceil(len(historyyear)*0.7) test_start_year=int(StartYear)+math.ceil(len(historyyear)*0.7) test_end_year=int(EndYear) x_train=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(train_start_year,train_end_year+1))] y_train=final.loc[final["Year"].isin(range(train_start_year,train_end_year+1))] x_test=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(test_start_year,test_end_year+1))] y_test=final.loc[final["Year"].isin(range(test_start_year,test_end_year+1))] #获取合适的PCA维度 pca = PCA(0.9) principalComponents = pca.fit_transform(Data_eco_scaler[Index].values) n_components = pca.n_components_#得到PCA的维度 #print("n_components = ",pca.n_components_) #进行PCA分析 pca = PCA(n_components) pca.fit(x_train[Index].values) x_train_pca=pca.transform(x_train[Index].values) y_train_pca=y_train[pretype] x_test_pca=pca.transform(x_test[Index].values) y_test_pca=y_test[pretype] #建立线性回归 pca_model = LinearRegression() pca_model.fit(x_train_pca, y_train_pca) pca_predict = pca_model.predict(x_test_pca) #评价指标 rmse = RMSE(pca_predict,y_test_pca) mape = MAPE(pca_predict,y_test_pca) #保存训练结果 # trainyear=[] # for t in y_test_pca: # for d in final.values: # if t>d[1]-5 and t<d[1]+5: # trainyear.append(d[0]) # break trainyear=[] for t in y_test_pca: count=-1 for d in final[pretype]: count+=1 if t>d-5 and t<d+5: # print("yes") trainyear.append(final.index[count]) break #预测 predata=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(int(PreStartYear),int(PreEndYear)+1))] predatatrain=pca.transform(predata[Index].values) predict=pca_model.predict(predatatrain) #PCA线性模型参数 #pca_coef = pca_model.coef_ #存储 ytrain=pca_predict.tolist() ypre=np.array(predict).squeeze().tolist() result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain,"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre,"MAPE":mape,"RMSE":rmse} return result
def Growth(StartYear, EndYear, PreStartYear, PreEndYear, pretype="全社会用电量", econamelist="GDP", city="云南省", planflag=1, plan=1, pro=1): """ Parameters ---------- StartYear : TYPE DESCRIPTION. EndYear : TYPE DESCRIPTION. PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. pretype : TYPE DESCRIPTION. econamelist : TYPE DESCRIPTION. city : TYPE, optional DESCRIPTION. The default is "云南省". planflag : TYPE, optional DESCRIPTION. The default is 0. plan : TYPE, optional DESCRIPTION. The default is 0. Returns ------- TYPE DESCRIPTION. """ def func3(params, x): a, b, c = params return np.exp(a / x + b) + c def error3(params, x, y): return func3(params, x) - y def slovePara3(x, y): p0 = [1, 0.02, 0] Para = leastsq(error3, p0, args=(x, y)) return Para econamelist = [econamelist] if len(econamelist) != 1: raise ValueError("仅支持选择一个因素变量") elif city == "云南省": name = [pretype] finaldata = [] #读取历史负荷数据 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) #读取经济数据 ecodatajson = getData("云南省_year_社会经济类", econamelist[0], StartYear, EndYear) ecodata = json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[0]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T x = final[econamelist[0]].values y = final[pretype].values #load x = x.reshape(-1, 1) y = y.reshape(-1, 1) #区分训练数据和预测数据 num = len(x) testyear = math.floor(num / 5) trainx = x[:num - testyear].squeeze() trainy = y[:num - testyear].squeeze() testx = x[num - testyear:] testy = y[num - testyear:] #建立模型 Para = slovePara3(trainx, trainy) a, b, c = Para[0] testp = ic.getpred(testx, testyear, planflag, plan, pro) testp = np.array(testp).T testpm = [] for i in range(51): testpm.append(np.mean(testp[i])) testpmm = testpm.index(np.median(testpm)) testpredx = testp[testpmm] testpredx = [k * testx[-1] for k in testpredx] testpredy = [np.exp(a / x + b) + c for x in testpredx] trainyear = [] for t in testy: count = -1 for d in final[pretype]: count += 1 if t > d - 5 and t < d + 5: # print("yes") trainyear.append(final.index[count]) break #误差 mape = MAPE(testpredy, testy) rmse = RMSE(testpredy, testy) #预测 x = x.squeeze() y = y.squeeze() Parapre = slovePara3(x, y) ap, bp, cp = Parapre[0] preyear = np.arange(int(PreStartYear), int(PreEndYear) + 1) year = len(preyear) p = ic.getpred(x, year, planflag, plan, pro) p = np.array(p).T pm = [] for i in range(51): pm.append(np.mean(p[i])) pmm = pm.index(np.median(pm)) predx = p[pmm] predx = [k * x[-1] for k in predx] predy = [np.exp(ap / x0 + bp) + cp for x0 in predx] predy = np.array(predy).squeeze() #存储 ytrain = np.array(testpredy).squeeze() ypre = np.array(predy).squeeze() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result
def BPNNIndustry(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype, city="云南省", hidden=[24, 12], learningrate=0.005, epoch=1000): """ Parameters ---------- StartYear : TYPE DESCRIPTION. EndYear : TYPE DESCRIPTION. PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. timestep : TYPE DESCRIPTION. pretype : TYPE, optional DESCRIPTION. The default is "consumption". city : TYPE, optional DESCRIPTION. The default is "云南省". hidden : TYPE, optional 神经网络的隐藏层, list, 几个元素代表几层,每层神经元个数为list元素值. The default is [24,12]. learningrate : TYPE, optional 神经网络学习率. The default is 0.005. epoch : TYPE, optional 训练学习次数. The default is 1000. Returns ------- None. """ def bpnn(timestep, outputlen, x_train, y_train, x_test, y_test, x_pre, hiddenneron, lr, epoch): x = tf.placeholder(tf.float32, shape=[None, timestep], name="Input") y = tf.placeholder(tf.float32, shape=[None, outputlen], name="Onput") hlen = len(hiddenneron) f = locals() for i in range(hlen + 1): if i == 0: f["f%s" % (i + 1)] = tf.contrib.layers.fully_connected( x, hiddenneron[i]) else: if i == hlen: pre = tf.contrib.layers.fully_connected( f["f%s" % (i)], outputlen) else: f["f%s" % (i + 1)] = tf.contrib.layers.fully_connected( f["f%s" % (i)], hiddenneron[i]) loss = tf.losses.mean_squared_error(y, pre) train_op = tf.train.AdamOptimizer(lr).minimize(loss) saver = tf.train.Saver() with tf.Session() as sess: init = tf.initialize_all_variables() sess.run(init) for i in range(epoch): sess.run(train_op, feed_dict={x: x_train, y: y_train}) lossz = sess.run(loss, feed_dict={x: x_train, y: y_train}) if i % 50 == 0: print(lossz) y_train_pre = sess.run(pre, feed_dict={x: x_train}) y_test_pre = sess.run(pre, feed_dict={x: x_test}) y_pre = sess.run(pre, feed_dict={x: x_pre}) training = np.array(y_train_pre).squeeze() predictions = np.array(y_test_pre).squeeze() labels = np.array(y_test).squeeze() # saver.save(sess, "D:/lab/Yunnan_Pre/result/yunnan_shortterm_钢铁_BPNN/") return predictions, labels, y_pre, training if timestep > (int(EndYear) - int(StartYear) + 1) * 0.5: raise ValueError("训练步长过大,请调整后重试") elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) + timestep): raise ValueError("历史时间长度小于预测时间长度与训练步长之和, 请调整后重试") else: #读取数据,确定参数 name = [pretype] finaldata = [] outputlen = int(PreEndYear) - int(PreStartYear) + 1 datajson = getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T test_size = 0 #测试数据集应当取0才可以 X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") testdata = final[pretype].values testinput = [] testoutput = [] num = len(X["train"]) selet = int(np.floor(num / 2)) testinput = X["train"][selet:, :] testoutput = y["train"][selet:, :] x_pre = np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape( 1, -1) test_pre, test_label, pre, training = bpnn( timestep, outputlen, X["train"][:-1, :], y["train"][:-1, :], testinput, testoutput, x_pre, hidden, learningrate, epoch) mape = MAPE(test_pre, test_label) rmse = RMSE(test_pre, test_label) #保存训练结果,年份上可能有问题 #trainingtrue=y["train"][:-1,:].flatten() trainingtrue = y["train"][-1, :] trainyear = [] for t in trainingtrue: count = -1 for d in final[pretype]: count += 1 if t > d - 5 and t < d + 5: # print("yes") trainyear.append(final.index[count]) break ytrain = training[-1] ypre = pre.flatten() #trainsave.to_csv("D:/lab/Yunnan_Pre/result/yunnan_shortterm_consumption_BPNN_training.csv") result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } #保存 return result
def LSTMpre(StartYear,EndYear,PreStartYear,PreEndYear,timestep,pretype="全社会用电量",city="云南省", hidden_size=24,hidden_layer=1, learningrate=0.005,epoch=1000): #搭建LSTM模块 def LSTM(x,y,outputlen,is_training,hidden_size,num_layers,lr,optimizer,keep_pro): cell=tf.nn.rnn_cell.BasicLSTMCell if is_training and keep_pro<1: lstmcell=tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(cell(hidden_size,activation=tf.nn.softsign),output_keep_prob=keep_pro) for _ in range(num_layers)]) else: lstmcell=tf.nn.rnn_cell.MultiRNNCell([cell(hidden_size) for _ in range(num_layers)]) x=tf.expand_dims(x,axis=2) outputs,current_state=tf.nn.dynamic_rnn(lstmcell,x,dtype=tf.float32) output=outputs[:,-1,:] predictions=tf.contrib.layers.fully_connected(output,outputlen) if not is_training: return predictions,None,None loss=tf.losses.absolute_difference(labels=y,predictions=predictions) train_op=tf.contrib.layers.optimize_loss(loss,tf.train.get_global_step(),optimizer=optimizer,learning_rate=lr) return predictions,loss,train_op #训练模型模块 def trainmodel(sess,outputlen,train_x,train_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step): ds=tf.data.Dataset.from_tensor_slices((train_x,train_y)) ds=ds.repeat().shuffle(100).batch(batch_size) x,y=ds.make_one_shot_iterator().get_next() prediction,loss,train_op=LSTM(x,y,outputlen,True,hidden_size,num_layers,lr,optimizer,keep_pro) losses=[] sess.run(tf.global_variables_initializer()) ytrain=[] for j in range(training_step): y,p,l=sess.run([prediction,train_op,loss]) ytrain.append(y) return ytrain #测试模型模块 def runmodel(sess,outputlen,test_x,test_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step): ds=tf.data.Dataset.from_tensor_slices((test_x,test_y)) ds=ds.batch(1) x,y=ds.make_one_shot_iterator().get_next() prediction,_,_=LSTM(x,[0.0],outputlen,False,hidden_size,num_layers,lr,optimizer,keep_pro) pre=[] label=[] for j in range(len(test_y)): p,l=sess.run([prediction,y]) pre.append(p) label.append(l) pre=np.array(pre).squeeze() labels=np.array(label).squeeze() return pre,labels #预测模型模块 def premodel(sess,outputlen,test_x,test_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step): prediction,_,_=LSTM(test_x,[0.0],outputlen,False,hidden_size,num_layers,lr,optimizer,keep_pro) finalpre=sess.run(prediction) return finalpre #设置参数 if timestep > (int(EndYear)-int(StartYear)+1)*0.5: raise ValueError("训练步长过大,请调整后重试") elif int(EndYear)-int(StartYear)<(int(PreEndYear)-int(PreStartYear)+timestep): raise ValueError("历史时间长度小于 预测时间长度与训练步长之和,请调整后重试") else: optimizer="Adam" keep_pro=0.9 batch_size=16 #读取数据,确定参数 name=[pretype] finaldata=[] outputlen=int(PreEndYear)-int(PreStartYear)+1 datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data=json.loads(datajson) finaldata.append(data) final=pd.DataFrame(finaldata,index=name) final=final.T test_size=0#测试数据集应当取0才可以 X,y=generate_data(final,timestep,outputlen,test_size=test_size,if_norm="no") testdata=final[pretype].values testinput=[] testoutput=[] num=len(X["train"]) selet=int(np.floor(num/2)) testinput=X["train"][selet:,:] testoutput=y["train"][selet:,:] #最终预测需要的数据 x_pre=testdata[-1:-(timestep+1):-1].reshape(1,-1) x_pre=np.array(x_pre, dtype = np.float32) #训练模型并预测结果 tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("LSTM"): ytrain=trainmodel(sess,outputlen,X["train"][:-1,:],y["train"][:-1,:],hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch) with tf.variable_scope("LSTM",reuse=True): test_pre,test_label=runmodel(sess,outputlen,testinput,testoutput,hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch) with tf.variable_scope("LSTM",reuse=True): ypre=premodel(sess,outputlen,x_pre,x_pre,hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch) mape=MAPE(test_pre,test_label) rmse=RMSE(test_pre,test_label) trainyear=[] trainingtrue=y["train"][-1,:] for t in trainingtrue: count=-1 for d in final[pretype]: count+=1 if t>d-5 and t<d+5: # print("yes") trainyear.append(final.index[count]) break ypre=np.array(ypre).squeeze() result={"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse} return result
def EEMDIndustry(StartYear, EndYear, PreStartYear, PreEndYear, pretype, city="云南省"): #判定当前的时间序列是否是单调序列 #读取年度数据 yeardatajson = getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear) yeardata = json.loads(yeardatajson) pdyeardata = pd.DataFrame(yeardata, index=[pretype]) pdyeardata = pdyeardata.T totalyear = int(EndYear) - int(StartYear) + 1 timestep = int(PreEndYear) - int(PreStartYear) + 1 trainyear = math.floor(totalyear - totalyear * 0.4) delay = math.floor((totalyear - trainyear - timestep) * 0.7) testyear = trainyear + delay if testyear + timestep > totalyear or delay < 1: raise ValueError("历史数据时间间隔过短或预测年份过长") else: train_x = pdyeardata[pretype].values[:trainyear] train_y = pdyeardata[pretype].values[trainyear:trainyear + timestep] train_x = train_x.reshape(1, -1) train_y = train_y.reshape(1, -1) test_x = pdyeardata[pretype].values[delay:testyear] test_y = pdyeardata[pretype].values[testyear:testyear + timestep] test_x = test_x.reshape(1, -1) test_y = test_y.reshape(1, -1) testdata = pdyeardata[pretype].values finalpre = np.array(np.flipud( testdata[-1:-(trainyear + 1):-1])).reshape(1, -1) eemd = EMD() IMFs = eemd(train_x.squeeze())[-1].reshape(1, -1) testIMFs = eemd(test_x.squeeze())[-1].reshape(1, -1) preIMFs = eemd(finalpre.squeeze())[-1].reshape(1, -1) gbdt = xgb.XGBRegressor(max_depth=5, learning_rate=0.1, n_estimators=100, silent=True, objective='reg:linear', booster='gblinear', n_jobs=50, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, random_state=0, seed=None, missing=None, importance_type='gain') # multi_model = MultiOutputRegressor(gbdt) # svr=SVR(kernel="poly",gamma="scale",C= 0.001)#kernel="linear","poly" # multi_model = MultiOutputRegressor(svr) multi_model.fit(IMFs, train_y) testpredict = multi_model.predict(testIMFs) ypre = multi_model.predict(preIMFs) print(testpredict, test_y) mape = MAPE(testpredict, test_y) rmse = RMSE(testpredict, test_y) teststarty = int(StartYear) + testyear - 1 testendy = teststarty + timestep - 1 ytrain = testpredict.flatten() ypre = ypre.reshape(-1, 1).squeeze() result = { "trainfromyear": teststarty, "traintoyear": testendy, "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result
def Combination(PreStartYear, PreEndYear, pretype, singleresult, city="云南省", comtype="等权组合"): """ Parameters ---------- PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. pretype : TYPE DESCRIPTION. singleresult : list 单预测模型的结果,list形式,每个元素为dict. city_or_industry : TYPE, optional DESCRIPTION. The default is "云南省". comtype : TYPE, optional 选择组合方法,有"等权组合","加权组合" 和 "递阶组合"可选. The default is "等权组合". Returns ------- None. """ def findtrain(alldata): n = len(alldata) start = "0" end = "9999999999" for i in range(n): data = alldata[i] startyear = data.get("trainfromyear") endyear = data.get("traintoyear") if int(startyear) > int(start): start = str(startyear) if int(endyear) < int(end): end = str(endyear) if int(end) - int(start) < 0: return None, None else: return start, end def normalization(index): #根据评价指标权重 indexsum = sum(index) weight = [] for i in range(len(index)): w = 1 - index[i] / indexsum weight.append(w) weightsum = sum(weight) finalweight = [x / weightsum for x in weight] return finalweight #检查模型是否可以组合 for tag in singleresult: r = getAlgorithmResult(tag) data = json.loads(json.loads(r)["results"][0][1]) print(tag) if ("PreStartYear" or "PreEndtYear") not in data["arg"].keys(): raise ValueError("%s 并非预测模型,不适用于组合预测模型" % tag) if "pretype" in data["arg"].keys(): if data["arg"]["pretype"] != pretype: raise ValueError("%s 的预测目标与组合预测的预测目标不符" % tag) if "pretype*" in data["arg"].keys(): if data["arg"]["pretype*"] != pretype: raise ValueError("%s 的预测目标与组合预测的预测目标不符" % tag) if data["arg"]["PreStartYear"] != int(PreStartYear): raise ValueError("%s 的预测起始年份与所选预测起始年份不符" % tag) elif data["arg"]["PreEndYear"] != int(PreEndYear): raise ValueError("%s 的预测起始年份与所选预测终止年份不符" % tag) elif "trainresult" not in data["result"].keys(): raise ValueError("%s 不适用于组合预测模型" % tag) #读取各个模型的数据 alldata = [] for tag in singleresult: r = getAlgorithmResult(tag) data = json.loads(json.loads(r)["results"][0][1]) alldata.append(data["result"]) trainyear = [0, 0] trainyear[0], trainyear[1] = findtrain(alldata) #构建训练数据集,numpy格式,同时获取预测数据集,numpy格式 if trainyear[0] != None: traindata = [] predata = [] singlermse = [] singlemape = [] for i in range(len(alldata)): d = alldata[i] StartYear = d.get("trainfromyear") EndYear = d.get("traintoyear") realyear = np.arange(int(StartYear), int(EndYear) + 1) a = np.where(realyear == int(trainyear[0]))[0][0] b = np.where(realyear == int(trainyear[1]))[0][0] tdata = d.get("trainresult")[a:b + 1] pdata = d.get("preresult") traindata.append(tdata) predata.append(pdata) singlermse.append(d.get("RMSE")) singlemape.append(d.get("MAPE")) traindata = np.array(traindata) predata = np.array(predata) #获取训练数据对应的真实数据 datajson = getData("云南省_year_电力电量类", pretype, trainyear[0], trainyear[1]) data = json.loads(datajson) realtraindata = [] for i in data.values(): realtraindata.append(i) realtraindata = np.array(realtraindata) if comtype == "等权组合": meancombination = predata.mean(axis=0) trainmeancombination = traindata.mean(axis=0) rmse = RMSE(trainmeancombination, realtraindata) mape = MAPE(trainmeancombination, realtraindata) ytrain = trainmeancombination.tolist() ypre = meancombination.tolist() elif comtype == "加权组合": weight = normalization(singlermse) weightcombination = np.average(predata, weights=weight, axis=0) trainweightcombination = np.average(traindata, weights=weight, axis=0) rmse = RMSE(trainweightcombination, realtraindata) mape = MAPE(trainweightcombination, realtraindata) ytrain = trainweightcombination.tolist() ypre = weightcombination.tolist() elif comtype == "递阶组合": againdata = copy.deepcopy(predata) againtrain = copy.deepcopy(traindata) for k in range(10): weight = normalization(singlermse) # print(weight) reweightcombination = np.average(againdata, weights=weight, axis=0) retrainweightcombination = np.average(againtrain, weights=weight, axis=0) r = RMSE(retrainweightcombination, realtraindata) if min(weight) > 1 / (len(predata)): break #比较权重,进行数据代替 dex = 0 replace = 0 for w in range(len(singlermse)): if singlermse[w] > r: if singlermse[w] > replace: dex = w replace = singlermse[w] singlermse[dex] = r againdata[dex] = reweightcombination againtrain[dex] = retrainweightcombination rmse = RMSE(retrainweightcombination, realtraindata) mape = MAPE(retrainweightcombination, realtraindata) ytrain = retrainweightcombination.tolist() ypre = reweightcombination.tolist() cname = copy.deepcopy(singleresult) cmape = copy.deepcopy(singlemape) crmse = copy.deepcopy(singlermse) cpre = copy.deepcopy(predata).tolist() cname.append(comtype) cmape.append(mape) crmse.append(rmse) cpre.append(ypre) #result = {"trainfromyear":trainyear[0],"traintoyear":trainyear[1],"trainresult":ytrain,"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre,"MAPE":mape,"RMSE":rmse} result = { "name": cname, "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": cpre, "MAPE": cmape, "RMSE": crmse } return result
def Logarithm(StartYear,EndYear,PreStartYear,PreEndYear,pretype="全社会用电量",econamelist="GDP",city="云南省",planflag=1,plan=1,pro=1): """对数函数""" def func5(params, x): a, b = params return a * np.log(x) + b def error5(params, x, y): return func5(params, x) - y def slovePara5(x,y): p0 = [1, 0.02] Para = leastsq(error5, p0, args=(x, y)) return Para econamelist=[econamelist] if len(econamelist) !=1: raise ValueError("仅支持选择一个因素变量") elif city=="云南省": name=[pretype] finaldata=[] #读取历史负荷数据 datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data=json.loads(datajson) finaldata.append(data) #读取经济数据 ecodatajson=getData("云南省_year_社会经济类", econamelist[0], StartYear, EndYear) ecodata=json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[0]) #获取最终数据DataFrame final=pd.DataFrame(finaldata,index=name) final=final.T x = final[econamelist[0]].values y = final[pretype].values #load x = x.reshape(-1,1) y = y.reshape(-1,1) #区分训练数据和预测数据 num=len(x) testyear=math.floor(num/5) trainx=x[:num-testyear].squeeze() trainy=y[:num-testyear].squeeze() testx=x[num-testyear:] testy=y[num-testyear:] Para = slovePara5(trainx,trainy) a, b = Para[0] testp = ic.getpred(testx,testyear,planflag,plan,pro) testp = np.array(testp).T testpm = [] for i in range(51): testpm.append(np.mean(testp[i])) testpmm = testpm.index(np.median(testpm)) testpredx = testp[testpmm] testpredx = [k * testx[-1] for k in testpredx] testpredy = [a*np.log (x) + b for x in testpredx] trainyear=[] for t in testy: count=-1 for d in final[pretype]: count+=1 if t>d-5 and t<d+5: # print("yes") trainyear.append(final.index[count]) break mape=MAPE(testpredy,testy) rmse=RMSE(testpredy,testy) x=x.squeeze() y=y.squeeze() Parapre = slovePara5(x,y) ap, bp = Parapre[0] preyear = np.arange(int(PreStartYear),int(PreEndYear)+1) year=len(preyear) p = ic.getpred(x,year,planflag,plan,pro) p = np.array(p).T pm = [] for i in range(51): pm.append(np.mean(p[i])) pmm = pm.index(np.median(pm)) predx = p[pmm] predx = [k * x[-1] for k in predx] predy = [ap*np.log (x0) + bp for x0 in predx] predy=np.array(predy).squeeze() #存储 ytrain=np.array(testpredy).squeeze() ypre=np.array(predy).squeeze() result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse} return result
def GPRM(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype="全社会用电量", city="云南省"): def improve_GM(x, n): ''' 改进灰色预测 x:序列,numpy对象 n:需要往后预测的个数 ''' x1 = x.cumsum() #一次累加 z1 = (x1[:len(x1) - 1] + x1[1:]) / 2.0 #紧邻均值 z1 = z1.reshape((len(z1), 1)) B = np.append(-z1, np.ones_like(z1), axis=1) Y = x[1:].reshape((len(x) - 1, 1)) #a为发展系数 b为灰色作用量 try: [[a], [b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Y) #计算参数 except: raise ValueError("中间矩阵不可逆,请重新调整历史数据时间或步长") #result = (x[0]-b/a)*np.exp(-a*(n-1))-(x[0]-b/a)*np.exp(-a*(n-2)) S1_2 = x.var() #原序列方差 e = list() #残差序列 for index in range(1, x.shape[0] + 1): predict = (x[0] - b / a) * np.exp( -a * (index - 1)) - (x[0] - b / a) * np.exp(-a * (index - 2)) e.append(x[index - 1] - predict) S2_2 = np.array(e).var() #残差方差 C = S2_2 / S1_2 #后验差比 if C <= 0.35: assess = '后验差比<=0.35,模型精度等级为好' elif C <= 0.5: assess = '后验差比<=0.5,模型精度等级为合格' elif C <= 0.65: assess = '后验差比<=0.65,模型精度等级为勉强' else: assess = '后验差比>0.65,模型精度等级为不合格' #预测数据 predict = list() for index in range(x.shape[0] + 1, x.shape[0] + n + 1): predict.append((x[0] - b / a) * np.exp(-a * (index - 1)) - (x[0] - b / a) * np.exp(-a * (index - 2))) predict = np.array(predict) return predict, a, b, assess def GMpre(x, n, a, b): predict = list() for index in range(x.shape[0] + 1, x.shape[0] + n + 1): predict.append((x[0] - b / a) * np.exp(-a * (index - 1)) - (x[0] - b / a) * np.exp(-a * (index - 2))) predict = np.array(predict) return predict if timestep > (int(EndYear) - int(StartYear) + 1): raise ValueError("训练步长过大,请调整后重试.") # elif int(PreEndYear)-int(PreStartYear)<1: # raise ValueError("该算法不支持一年及一年内的预测.") elif timestep < (int(PreEndYear) - int(PreStartYear) + 2): raise ValueError("训练步长小于预测年份区间长度,请增加训练步长.") else: """负荷预测""" name = [pretype] finaldata = [] datayear = np.arange(int(StartYear), int(EndYear) + 1) #读取历史负荷数据 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T datafinalyear = int(EndYear) trainyear = timestep testyear = int(PreEndYear) - int(PreStartYear) + 1 y = final.values y = y.reshape(-1, 1) #区分训练数据和预测数据 num = len(y) #训练集 trainx = y[num - testyear - 1 - trainyear:num - testyear - 1].squeeze() trainy = y[num - testyear - 1:num - 1].squeeze() #测试集 testx = y[num - testyear - trainyear:num - testyear].squeeze() testy = y[num - testyear:] if len(testy) > 1: testy = testy.squeeze() #开始训练 trainpre, a, b, assess = improve_GM(trainx, testyear) #获得测试结果 testpre = GMpre(testx, testyear, a, b) #获得最终预测 testpredx = np.array(np.flipud(y[-1:-(trainyear + 1):-1])) finalpre = GMpre(testpredx, testyear, a, b) mape = MAPE(testpre, testy) rmse = RMSE(testpre, testy) ypre = finalpre.reshape(1, -1).squeeze() trainyear = datayear[num - testyear:] # for t in testy: # count=-1 # for d in final[pretype]: # count+=1 # if t>d-5 and t<d+5: # # print("yes") # trainyear.append(final.index[count]) # break result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": trainpre.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } #保存 return result
def FER(StartYear,EndYear,PreStartYear,PreEndYear,timestep,pretype="全社会用电量",city="云南省"): """ Parameters ---------- StartYear : TYPE DESCRIPTION. EndYear : TYPE DESCRIPTION. PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. timestep : TYPE, optional DESCRIPTION. The default is 15. pretype : TYPE, optional DESCRIPTION. The default is "consumption". city : TYPE, optional DESCRIPTION. The default is "云南省". Returns ------- result : TYPE DESCRIPTION. """ def exponential_smoothing(series, alpha): #一次指数平滑 result = [series[0]] # first value is same as series for n in range(1, len(series)): result.append(alpha * series[n] + (1 - alpha) * result[n-1]) return result def double_exponential_smoothing(series, alpha, beta): #二次指数平滑 result = [series[0]] for n in range(1, len(series)): if n == 1: # initialize level, trend = series[0], series[1] - series[0] if n >= len(series): # we are forecasting value = result[-1] else: value = series[n] last_level, level = level, alpha*value + (1-alpha)*(level+trend) # a-hat at t trend = beta*(level-last_level) + (1-beta)*trend # b-hat at t final=level+trend result.append(final) return result if timestep > (int(EndYear)-int(StartYear)+1): raise ValueError("训练步长过大,请调整后重试.") elif int(PreEndYear)-int(PreStartYear)<1: raise ValueError("该算法不支持一年及一年内的预测.") elif timestep<(int(PreEndYear)-int(PreStartYear)+2): raise ValueError("训练步长小于预测年份区间长度,请增加训练步长.") else: #读取数据 datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data=json.loads(datajson) name=[pretype] finaldata=[] finaldata.append(data) final=pd.DataFrame(finaldata,index=name) period=int(PreEndYear)-int(PreStartYear)+1 econamelist=["第一产业GDP","第二产业GDP","第三产业GDP"] #读取经济数据 for i in range(len(econamelist)): ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata=json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final=pd.DataFrame(finaldata,index=name) final=final.T #获取训练所用的数据集 data1=final.iloc[len(final.values)-timestep:] num=len(data1.values) #预测经济数据 eco=predict.pre(data1,econamelist[0],PreStartYear,PreEndYear) for j in range(1,len(econamelist)): c=predict.pre(data1,econamelist[j],PreStartYear,PreEndYear) eco=pd.merge(eco,c,on="year") #获得训练集和测试集 trainx=eco.loc[:,econamelist] trainy=data1.loc[:,pretype] prex=eco.loc[num:,econamelist] #创建模糊控制变量 GDP1=ctrl.Antecedent(np.arange( 100, 15000, 20 ), "gdp1" ) GDP2=ctrl.Antecedent(np.arange( 150, 20000, 20 ), "gdp2" ) # GDP3=ctrl.Antecedent(np.arange( 100, 25000, 20 ), "gdp3" ) fuload=ctrl.Consequent(np.arange( 100, 8000, 1 ), "futureload" ) #定义模糊集和其隶属度函数 GDP1[ "very low" ] = fuzz.trimf( GDP1.universe, [ 100, 300, 500 ] ) GDP1[ "low" ] = fuzz.trimf( GDP1.universe, [ 400, 850, 1250 ] ) GDP1[ "medium" ] = fuzz.trimf( GDP1.universe, [ 1000, 2500, 4000 ] ) GDP1[ "high" ] = fuzz.trimf( GDP1.universe, [ 3700, 5500, 7500] ) GDP1[ "very high" ] = fuzz.trimf( GDP1.universe, [ 7300, 12000, 15000] ) GDP2[ "very low" ] = fuzz.trimf(GDP2.universe, [ 100, 500, 900 ] ) GDP2[ "low" ] = fuzz.trimf(GDP2.universe, [ 500, 1450, 2600 ] ) GDP2[ "medium" ] = fuzz.trimf(GDP2.universe, [ 2500, 6500, 10500 ] ) GDP2[ "high" ] = fuzz.trimf(GDP2.universe, [ 9500, 12000, 14000] ) GDP2[ "very high" ] = fuzz.trimf(GDP2.universe, [ 13500, 16000, 20000] ) # GDP3[ "very low" ] = fuzz.trimf(GDP3.universe, [ 100, 400, 700 ] ) # GDP3[ "low" ] = fuzz.trimf(GDP3.universe, [ 650, 1400, 2750 ] ) # GDP3[ "medium" ] = fuzz.trimf(GDP3.universe, [ 2600, 6000, 13000 ] ) # GDP3[ "high" ] = fuzz.trimf(GDP3.universe, [ 12000, 15000, 18000] ) # GDP3[ "very high" ] = fuzz.trimf(GDP3.universe, [ 17000, 21000, 25000] ) fuload[ "very low" ] = fuzz.trimf( fuload.universe, [ 100, 200, 300 ] ) fuload[ "low" ] = fuzz.trimf( fuload.universe, [ 250, 550, 1100 ] ) fuload[ "medium" ] = fuzz.trimf( fuload.universe, [ 1050, 1900, 3000 ] ) fuload[ "high" ] = fuzz.trimf( fuload.universe, [ 2750, 3500, 5100 ] ) fuload[ "very high" ] = fuzz.trimf(fuload.universe, [ 5000, 8000, 8000 ] ) # #定义模糊规则 rule=locals() rule1 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "very low" ], fuload[ "very low" ] ) rule2 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "low" ], fuload[ "very low" ] ) rule3 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "medium" ], fuload[ "low" ] ) rule4 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "high" ], fuload[ "medium" ] ) rule5 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "very high" ], fuload[ "medium" ] ) rule6 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "very low" ], fuload["very low" ] ) rule7 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "low" ], fuload[ "low" ] ) rule8 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "medium" ], fuload[ "low" ] ) rule9 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "high" ], fuload["medium" ] ) rule10 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "very high" ], fuload["medium" ] ) rule11 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "very low" ], fuload["low" ] ) rule12 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "low" ], fuload["low" ] ) rule13 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "medium" ], fuload["medium" ] ) rule14 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "high" ], fuload["high" ] ) rule15 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "very high" ], fuload["medium" ] ) rule16 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "very low" ], fuload["low" ] ) rule17 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "low" ], fuload["medium" ] ) rule18 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "medium" ], fuload["high"] ) rule19 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "high" ], fuload["high" ] ) rule20 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "very high" ], fuload["very high" ] ) rule21 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "very low" ], fuload["low" ] ) rule22 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "low" ], fuload["low" ] ) rule23 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "medium" ], fuload["medium" ] ) rule24 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "high" ], fuload["high" ] ) rule25 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "very high" ], fuload["very high" ] ) fuzzy_ctrl = ctrl.ControlSystem([ rule1, rule2, rule3, rule4, rule5,rule6, rule7, rule8, rule9, rule10, rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, rule20, rule21, rule22, rule23, rule24, rule25]) consumptionSystem = ctrl.ControlSystemSimulation( fuzzy_ctrl ) #评估 trainn=len(trainx) systemoutput=np.zeros(trainn, dtype=np.float64 ) for i in range(trainn): consumptionSystem.input["gdp1"] = trainx.loc[i,econamelist[0]] consumptionSystem.input["gdp2"] = trainx.loc[i,econamelist[1]] consumptionSystem.compute() systemoutput[i] = consumptionSystem.output["futureload"] alpha=0.9 beta=1 #对结果进行二次指数平滑 allexsystemoutput=double_exponential_smoothing(systemoutput[:num], alpha, beta) exsystemoutput=double_exponential_smoothing(systemoutput[num-period:num], alpha, beta) exprey=double_exponential_smoothing(systemoutput[num:], alpha, beta) mape=MAPE(exsystemoutput,trainy.values[num-period:num]) rmse=RMSE(exsystemoutput,trainy.values[num-period:num]) #保存结果 trainyear=data1.index ytrain=np.array(allexsystemoutput).reshape(1,-1).squeeze() ypre=np.array(exprey).reshape(1,-1).squeeze() result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse} return result
def GPRM(StartYear,EndYear,PreStartYear,PreEndYear,timestep=15,pretype="consumption",city="云南省"): def improve_GM(x,n): ''' 改进灰色预测 x:序列,numpy对象 n:需要往后预测的个数 ''' x1 = x.cumsum()#一次累加 z1 = (x1[:len(x1) - 1] + x1[1:])/2.0#紧邻均值 z1 = z1.reshape((len(z1),1)) B = np.append(-z1,np.ones_like(z1),axis=1) Y = x[1:].reshape((len(x) - 1,1)) #a为发展系数 b为灰色作用量 [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Y)#计算参数 result = (x[0]-b/a)*np.exp(-a*(n-1))-(x[0]-b/a)*np.exp(-a*(n-2)) S1_2 = x.var()#原序列方差 e = list()#残差序列 for index in range(1,x.shape[0]+1): predict = (x[0]-b/a)*np.exp(-a*(index-1))-(x[0]-b/a)*np.exp(-a*(index-2)) e.append(x[index-1]-predict) S2_2 = np.array(e).var()#残差方差 C = S2_2/S1_2#后验差比 if C<=0.35: assess = '后验差比<=0.35,模型精度等级为好' elif C<=0.5: assess = '后验差比<=0.5,模型精度等级为合格' elif C<=0.65: assess = '后验差比<=0.65,模型精度等级为勉强' else: assess = '后验差比>0.65,模型精度等级为不合格' #预测数据 predict = list() for index in range(x.shape[0]+1,x.shape[0]+n+1): predict.append((x[0]-b/a)*np.exp(-a*(index-1))-(x[0]-b/a)*np.exp(-a*(index-2))) predict = np.array(predict) return predict,a,b,assess def GMpre(x,n,a,b): predict = list() for index in range(x.shape[0]+1,x.shape[0]+n+1): predict.append((x[0]-b/a)*np.exp(-a*(index-1))-(x[0]-b/a)*np.exp(-a*(index-2))) predict = np.array(predict) return predict """负荷预测""" name=[pretype] finaldata=[] outputlen=int(PreEndYear)-int(PreStartYear)+1 #读取历史负荷数据 datajson=getData("yunnan_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data=json.loads(datajson) finaldata.append(data) final=pd.DataFrame(finaldata,index=name) final=final.T datafinalyear=int(EndYear) trainyear=timestep testyear=int(PreEndYear)-int(PreStartYear)+1 y = final.values y = y.reshape(-1,1) #区分训练数据和预测数据 num=len(y) #训练集 trainx=y[num-testyear-1-trainyear:num-testyear-1].squeeze() trainy=y[num-testyear-1:].squeeze() #测试集 testx=y[num-testyear-trainyear:num-testyear].squeeze() testy=y[num-testyear:].squeeze() #开始训练 trainpre,a,b,assess=improve_GM(trainx,testyear) #获得测试结果 testpre=GMpre(testx,testyear,a,b) #获得最终预测 testpredx=np.array(np.flipud(y[-1:-(trainyear+1):-1])) finalpre=GMpre(testpredx,testyear,a,b) mape=MAPE(testpre,testy) rmse=RMSE(testpre,testy) ypre=finalpre.squeeze().reshape(1,-1) trainyear=[] for t in testy: count=-1 for d in final[pretype]: count+=1 if t>d-5 and t<d+5: # print("yes") trainyear.append(final.index[count]) break result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":trainpre,"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre,"MAPE":mape,"RMSE":rmse} #保存 return result
def Binarylinear(StartYear,EndYear,PreStartYear,PreEndYear,econamelist,pretype="全社会用电量",city="云南省",planflag1=1,plan1=1,pro1=1,planflag2=1,plan2=1,pro2=1): """ Parameters ---------- StartYear : TYPE DESCRIPTION. EndYear : TYPE DESCRIPTION. PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. pretype : TYPE DESCRIPTION. econamelist : TYPE DESCRIPTION. city : TYPE, optional DESCRIPTION. The default is "云南省". planflag1 : TYPE, optional DESCRIPTION. The default is 0. plan1 : TYPE, optional DESCRIPTION. The default is 0. planflag2 : TYPE, optional DESCRIPTION. The default is 0. plan2 : TYPE, optional DESCRIPTION. The default is 0. Returns ------- TYPE DESCRIPTION. """ def madd(X,Y): Z = [] lenX = len(X) for i in range(lenX): Z.append(X[i][0]+Y[i][0]) return Z if len(econamelist) !=2: return {"False":"请重新选择两个经济变量."} elif city=="云南省": name=[pretype] finaldata=[] period=int(PreEndYear)-int(PreStartYear)+1 #读取历史负荷数据 datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data=json.loads(datajson) finaldata.append(data) #读取经济数据 for i in range(2): ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata=json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final=pd.DataFrame(finaldata,index=name) final=final.T x1 = final[econamelist[0]].values x2 = final[econamelist[1]].values y = final[pretype].values #load x1 = x1.reshape(-1,1) x2 = x2.reshape(-1,1) xx=np.concatenate((x1,x2),axis=1) y = y.reshape(-1,1) #区分训练数据和预测数据 num=len(y) testyear=math.floor(num/5) trainx=xx[:num-testyear] trainy=y[:num-testyear] testx=xx[num-testyear:] testy=y[num-testyear:] # reg = LinearRegression().fit(trainx, trainy) reg = LinearRegression().fit(xx, y) testp1 = ic.getpred(testx[:,0],testyear,planflag1,plan1,pro1) testp1 = np.array(testp1).T testpm1 = [] for i in range(51): testpm1.append(np.mean(testp1[i])) testpmm1 = testpm1.index(np.median(testpm1)) testpredx1 = testp1[testpmm1] testpredx1 = [k * testx[:,0][-1] for k in testpredx1] print(testpredx1) testpredy1 = [testx[:,0] * reg.coef_[0][0] + reg.intercept_[0] for testx[:,0] in testpredx1] testp2 = ic.getpred(testx[:,1],testyear,planflag2,plan2,pro2) testp2 = np.array(testp2).T testpm2 = [] for i in range(51): testpm2.append(np.mean(testp2[i])) testpmm2 = testpm2.index(np.median(testpm2)) testpredx2 = testp2[testpmm2] testpredx2 = [k * testx[:,1][-1] for k in testpredx2] testpredy2 = [testx[:,1] * reg.coef_[0][1] for testx[:,1] in testpredx2] testpredy = madd(testpredy1 , testpredy2) # testpredy=np.array(testpredy).squeeze() # loadp = reg.predict(testx)#趋势外推 mape=MAPE(testpredy,testy) rmse=RMSE(testpredy,testy) trainyear=[] for t in testy: count=-1 for d in final[pretype]: count+=1 if t>d-5 and t<d+5: # print("yes") trainyear.append(final.index[count]) break """预测""" preyear = np.arange(int(PreStartYear),int(PreEndYear)+1) year=len(preyear) p1 = ic.getpred(xx[:,0],year,planflag1,plan1,pro1) p1 = np.array(p1).T pm1 = [] for i in range(51): pm1.append(np.mean(p1[i])) pmm1 = pm1.index(np.median(pm1)) predx1 = p1[pmm1] predx1 = [k * xx[:,0][-1] for k in predx1] predy1 = [xx[:,0] * reg.coef_[0][0] + reg.intercept_[0] for xx[:,0] in predx1] p2 = ic.getpred(xx[:,1],year,planflag2,plan2,pro2) p2 = np.array(p2).T pm2 = [] for i in range(51): pm2.append(np.mean(p2[i])) pmm2 = pm2.index(np.median(pm2)) predx2 = p2[pmm2] predx2 = [k * xx[:,1][-1] for k in predx2] predy2 = [xx[:,1] * reg.coef_[0][1] for xx[:,1] in predx2] predy = madd(predy1 , predy2) predy=np.array(predy).squeeze() #存储 ytrain=np.array(testpredy).squeeze() ypre=np.array(predy).squeeze() result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse} return result
def QuantileRegression(StartYear, EndYear, PreStartYear, PreEndYear, quatile=0.95, pretype="全社会用电量", econamelist=["GDP"], city="云南省"): #首先需要回归得到未来的经济数据 def get_coef(data, xnamelist, yname, quatile): #获得分位数回归线性关系 #注意xnamelist 最多只能容纳5个变量,yname是str n = len(xnamelist) print(yname, xnamelist) if n == 1: mod = smf.quantreg('%s ~ %s' % (yname, xnamelist[0]), data) elif n == 2: mod = smf.quantreg( '%s ~ %s+%s' % (yname, xnamelist[0], xnamelist[1]), data) elif n == 3: mod = smf.quantreg( '%s ~ %s+%s+%s' % (yname, xnamelist[0], xnamelist[1], xnamelist[2]), data) elif n == 4: mod = smf.quantreg( '%s ~ %s+%s+%s+%s' % (yname, xnamelist[0], xnamelist[1], xnamelist[2], xnamelist[3]), data) elif n == 5: mod = smf.quantreg( '%s ~ %s+%s+%s+%s+%s' % (yname, xnamelist[0], xnamelist[1], xnamelist[2], xnamelist[3], xnamelist[4]), data) res = mod.fit(q=quatile) print(res.summary()) #返回分位点,截距,各个参数系数 和 各个参数lb,ub return quatile, res.params['Intercept'], res.params[ xnamelist], res.conf_int().loc[xnamelist] def predict(data, intercept, coef, quatile, xnamelist): #这里的data只有x没有y n = len(xnamelist) pre = [intercept] * len(data.values) for i in range(n): pre = pre + coef[xnamelist[i]] * data[xnamelist[i]].values return pre #判断经济因素数量是否合适 if len(econamelist) > 5: delnum = len(econamelist) - 5 raise ValueError("经济因素选取不应超出 5 个,请删去 %s 个,再重新预测" % delnum) elif int(PreEndYear) - int(PreStartYear) < 1: raise ValueError("该算法不支持一年及一年内的预测") elif (int(EndYear) - int(StartYear) + 1) < 5: raise ValueError("历史年份区间过短,建议历史年份区间在 5 年以上") elif city == "云南省": name = [pretype] finaldata = [] period = int(PreEndYear) - int(PreStartYear) + 1 #读取历史负荷数据 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) #读取经济数据 for i in range(len(econamelist)): ecodatajson = getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata = json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T #预测经济数据 # print(logfinal[econamelist[0]].to_frame().column) eco = preeco.pre(final, econamelist[0], PreStartYear, PreEndYear) for j in range(1, len(econamelist)): c = preeco.pre(final, econamelist[j], PreStartYear, PreEndYear) eco = pd.merge(eco, c, on="year") q, b, k, lbub = get_coef(final, econamelist, pretype, 0.95) y = predict(eco, b, k, q, econamelist) #求mape,rmse ytrain = y[:len(y) - period] ytraintrue = final[pretype].values[:len(y) - period] mape = MAPE(ytrain, ytraintrue) rmse = RMSE(ytrain, ytraintrue) ypre = y[len(y) - period:] #返回结果 result = { "trainfromyear": StartYear, "traintoyear": EndYear, "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result else: raise ValueError("暂不支持其他地区预测")
def ESQRM(StartYear, EndYear, PreStartYear, PreEndYear, quatile=0.95, pretype="全社会用电量", econamelist=["GDP"], city="云南省"): """ Parameters ---------- StartYear : str 历史数据起始年份 EndYear : str 历史数据终止年份 PreStartYear : str 预测起始年份 PreEndYear : str 预测终止年份 pretype : str 预测类型:"consumption"、"load" quatile : float 分位数,默认为0.95 econamelist : list 选取的经济数据名称列表 city : str 选择城市,默认云南省 Returns ------- "trainfromyear":StartYear "traintoyear":EndYear "trainresult":ytrain, array 训练结果 "prefromyear":PreStartYear "pretoyear":PreEndYear "preresult":ypre, array 预测结果 "MAPE":mape, float "RMSE":rmse, float """ def get_coef(data, pretype, econamelist, quatile): #获得分位数回归线性关系 #注意econamelist 最多只能容纳5个变量,yname是str n = len(econamelist) # print("num",n) if n == 1: mod = smf.quantreg('%s ~ %s' % (pretype, econamelist[0]), data) elif n == 2: mod = smf.quantreg( '%s ~ %s+%s' % (pretype, econamelist[0], econamelist[1]), data) elif n == 3: mod = smf.quantreg( '%s ~ %s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2]), data) elif n == 4: mod = smf.quantreg( '%s ~ %s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2], econamelist[3]), data) elif n == 5: mod = smf.quantreg( '%s ~ %s+%s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2], econamelist[3], econamelist[4]), data) res = mod.fit(q=quatile) # print(res.summary()) #返回分位点,截距,各个参数系数 和 各个参数lb,ub return quatile, res.params['Intercept'], res.params[ econamelist], res.conf_int().loc[econamelist] def predict(data, intercept, coef, quatile, econamelist): #这里的data只有x没有y n = len(econamelist) pre = [intercept] * len(data.values) for i in range(n): pre = pre + coef[econamelist[i]] * data[econamelist[i]].values pre = np.exp(pre) return pre #判断经济因素数量是否合适 if len(econamelist) > 5: delnum = len(econamelist) - 5 raise ValueError("经济因素选取不应超出 5 个,请删去 %s 个,再重新预测" % delnum) elif int(PreEndYear) - int(PreStartYear) < 1: raise ValueError("该算法不支持一年及一年内的预测") elif (int(EndYear) - int(StartYear) + 1) < 5: raise ValueError("历史年份区间过短,建议历史年份区间在 5 年以上") elif city == "云南省": name = [pretype] finaldata = [] period = int(PreEndYear) - int(PreStartYear) + 1 #读取历史负荷数据 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) #读取经济数据 for i in range(len(econamelist)): ecodatajson = getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata = json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T #取对数 logfinal = final.apply(np.log) #预测经济数据 # print(logfinal[econamelist[0]].to_frame().column) eco = preeco.pre(logfinal, econamelist[0], PreStartYear, PreEndYear) for j in range(1, len(econamelist)): c = preeco.pre(logfinal, econamelist[j], PreStartYear, PreEndYear) eco = pd.merge(eco, c, on="year") #预测 q, b, k, lbub = get_coef(logfinal, pretype, econamelist, quatile) y = predict(eco, b, k, q, econamelist) #求训练集误差mape,rmse ytrain = y[:len(y) - period] ytraintrue = final[pretype].values[:len(y) - period] mape = MAPE(ytrain, ytraintrue) rmse = RMSE(ytrain, ytraintrue) ypre = y[len(y) - period:] #返回结果 result = { "trainfromyear": StartYear, "traintoyear": EndYear, "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result else: raise ValueError("暂不支持其他地区预测")
def FLR(StartYear, EndYear, PreStartYear, PreEndYear, timestep=15, pretype="consumption", city="云南省"): """ Parameters ---------- StartYear : TYPE DESCRIPTION. EndYear : TYPE DESCRIPTION. PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. timestep : TYPE DESCRIPTION. pretype : TYPE, optional DESCRIPTION. The default is "consumption". city : TYPE, optional DESCRIPTION. The default is "云南省". Returns ------- None. """ #读取数据 datajson = getData("yunnan_year_电力电量类", pretype, StartYear, EndYear) data = json.loads(datajson) name = [pretype] finaldata = [] finaldata.append(data) final = pd.DataFrame(finaldata, index=name) period = int(PreEndYear) - int(PreStartYear) + 1 econamelist = ["GDP1", "GDP2", "GDP3"] #读取经济数据 for i in range(len(econamelist)): ecodatajson = getData("yunnan_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata = json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T #获取训练所用的数据集 data1 = final.iloc[len(final.values) - timestep:] num = len(data1.values) #预测经济数据 eco = predict.pre(data1, econamelist[0], PreStartYear, PreEndYear) for j in range(1, len(econamelist)): c = predict.pre(data1, econamelist[j], PreStartYear, PreEndYear) eco = pd.merge(eco, c, on="year") #获得训练集和测试集 trainx = eco.loc[:, econamelist] trainy = data1.loc[:, pretype] prex = eco.loc[num:, econamelist] #创建模糊控制变量 GDP1 = ctrl.Antecedent(np.arange(100, 15000, 20), "gdp1") GDP2 = ctrl.Antecedent(np.arange(150, 20000, 20), "gdp2") # GDP3=ctrl.Antecedent(np.arange( 100, 25000, 20 ), "gdp3" ) fuload = ctrl.Consequent(np.arange(100, 8000, 1), "futureload") #定义模糊集和其隶属度函数 GDP1["very low"] = fuzz.trimf(GDP1.universe, [100, 300, 500]) GDP1["low"] = fuzz.trimf(GDP1.universe, [400, 850, 1250]) GDP1["medium"] = fuzz.trimf(GDP1.universe, [1000, 2500, 4000]) GDP1["high"] = fuzz.trimf(GDP1.universe, [3700, 5500, 7500]) GDP1["very high"] = fuzz.trimf(GDP1.universe, [7300, 12000, 15000]) GDP2["very low"] = fuzz.trimf(GDP2.universe, [100, 500, 900]) GDP2["low"] = fuzz.trimf(GDP2.universe, [500, 1450, 2600]) GDP2["medium"] = fuzz.trimf(GDP2.universe, [2500, 6500, 10500]) GDP2["high"] = fuzz.trimf(GDP2.universe, [9500, 12000, 14000]) GDP2["very high"] = fuzz.trimf(GDP2.universe, [13500, 16000, 20000]) # GDP3[ "very low" ] = fuzz.trimf(GDP3.universe, [ 100, 400, 700 ] ) # GDP3[ "low" ] = fuzz.trimf(GDP3.universe, [ 650, 1400, 2750 ] ) # GDP3[ "medium" ] = fuzz.trimf(GDP3.universe, [ 2600, 6000, 13000 ] ) # GDP3[ "high" ] = fuzz.trimf(GDP3.universe, [ 12000, 15000, 18000] ) # GDP3[ "very high" ] = fuzz.trimf(GDP3.universe, [ 17000, 21000, 25000] ) fuload["very low"] = fuzz.trimf(fuload.universe, [100, 200, 300]) fuload["low"] = fuzz.trimf(fuload.universe, [250, 550, 1100]) fuload["medium"] = fuzz.trimf(fuload.universe, [1050, 1900, 3000]) fuload["high"] = fuzz.trimf(fuload.universe, [2750, 3500, 5100]) fuload["very high"] = fuzz.trimf(fuload.universe, [5000, 8000, 8000]) # #定义模糊规则 rule = locals() rule1 = ctrl.Rule(GDP1["very low"] & GDP2["very low"], fuload["very low"]) rule2 = ctrl.Rule(GDP1["very low"] & GDP2["low"], fuload["very low"]) rule3 = ctrl.Rule(GDP1["very low"] & GDP2["medium"], fuload["low"]) rule4 = ctrl.Rule(GDP1["very low"] & GDP2["high"], fuload["medium"]) rule5 = ctrl.Rule(GDP1["very low"] & GDP2["very high"], fuload["medium"]) rule6 = ctrl.Rule(GDP1["low"] & GDP2["very low"], fuload["very low"]) rule7 = ctrl.Rule(GDP1["low"] & GDP2["low"], fuload["low"]) rule8 = ctrl.Rule(GDP1["low"] & GDP2["medium"], fuload["low"]) rule9 = ctrl.Rule(GDP1["low"] & GDP2["high"], fuload["medium"]) rule10 = ctrl.Rule(GDP1["low"] & GDP2["very high"], fuload["medium"]) rule11 = ctrl.Rule(GDP1["medium"] & GDP2["very low"], fuload["low"]) rule12 = ctrl.Rule(GDP1["medium"] & GDP2["low"], fuload["low"]) rule13 = ctrl.Rule(GDP1["medium"] & GDP2["medium"], fuload["medium"]) rule14 = ctrl.Rule(GDP1["medium"] & GDP2["high"], fuload["high"]) rule15 = ctrl.Rule(GDP1["medium"] & GDP2["very high"], fuload["medium"]) rule16 = ctrl.Rule(GDP1["high"] & GDP2["very low"], fuload["low"]) rule17 = ctrl.Rule(GDP1["high"] & GDP2["low"], fuload["medium"]) rule18 = ctrl.Rule(GDP1["high"] & GDP2["medium"], fuload["high"]) rule19 = ctrl.Rule(GDP1["high"] & GDP2["high"], fuload["high"]) rule20 = ctrl.Rule(GDP1["high"] & GDP2["very high"], fuload["very high"]) rule21 = ctrl.Rule(GDP1["very high"] & GDP2["very low"], fuload["low"]) rule22 = ctrl.Rule(GDP1["very high"] & GDP2["low"], fuload["low"]) rule23 = ctrl.Rule(GDP1["very high"] & GDP2["medium"], fuload["medium"]) rule24 = ctrl.Rule(GDP1["very high"] & GDP2["high"], fuload["high"]) rule25 = ctrl.Rule(GDP1["very high"] & GDP2["very high"], fuload["very high"]) fuzzy_ctrl = ctrl.ControlSystem([ rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8, rule9, rule10, rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, rule20, rule21, rule22, rule23, rule24, rule25 ]) consumptionSystem = ctrl.ControlSystemSimulation(fuzzy_ctrl) #评估 trainn = len(trainx) systemoutput = np.zeros(trainn, dtype=np.float64) for i in range(trainn): consumptionSystem.input["gdp1"] = trainx.loc[i, econamelist[0]] consumptionSystem.input["gdp2"] = trainx.loc[i, econamelist[1]] consumptionSystem.compute() systemoutput[i] = consumptionSystem.output["futureload"] mape = MAPE(systemoutput[num - period:num], trainy.values[num - period:num]) rmse = RMSE(systemoutput[num - period:num], trainy.values[num - period:num]) #保存结果 trainyear = data1.index ytrain = systemoutput[:num] ypre = np.array(systemoutput[num:]).reshape(1, -1) result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain, "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre, "MAPE": mape, "RMSE": rmse } return result
def GM(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype="全社会用电量", city="云南省"): def RGM(x, n): ''' x为原始序列 n为往后预测的个数 ''' x1 = x.cumsum() #一次累加 z1 = (x1[:len(x1) - 1] + x1[1:]) / 2.0 #紧邻均值 z1 = z1.reshape((len(z1), 1)) B = np.append(-z1, np.ones_like(z1), axis=1) Y = x[1:].reshape((len(x) - 1, 1)) #a为发展系数 b为灰色作用量 try: [[a], [b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Y) #计算参数 except: raise ValueError("中间矩阵不可逆,请重新调整历史数据时间或步长") imitate = list() predict = list() der = list() for index in range(0, x.shape[0]): imitate.append((x[0] - b / a) * np.exp(-a * (index)) * (-a)) for index in range(x.shape[0] + 1, x.shape[0] + n + 1): predict.append((x[0] - b / a) * np.exp(-a * (index - 1)) * (-a)) for index in range(0, x.shape[0] + n): der.append((x[0] - b / a) * np.exp(-a * index) * (pow(a, 2))) # return { # 'a':{'value':a,'desc':'发展系数'}, # 'b':{'value':b,'desc':'灰色作用量'}, # 'imitate':{'value':imitate,'desc':'模拟值'}, # 'predict':{'value':predict,'desc':'预测值'}, # 'der':{'value':der,'desc':'x0斜率'} # } return predict, a, b def RGMpre(x, n, a, b): predict = list() for index in range(x.shape[0] + 1, x.shape[0] + n + 1): predict.append((x[0] - b / a) * np.exp(-a * (index - 1)) * (-a)) predict = np.array(predict) return predict if timestep > (int(EndYear) - int(StartYear) + 1): raise ValueError("训练步长过大,请调整后重试.") elif timestep < (int(PreEndYear) - int(PreStartYear) + 2): raise ValueError("训练步长小于预测年份区间长度,请增加训练步长.") else: """负荷预测""" name = [pretype] finaldata = [] datayear = np.arange(int(StartYear), int(EndYear) + 1) #读取历史负荷数据 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T datafinalyear = int(EndYear) trainyear = timestep testyear = int(PreEndYear) - int(PreStartYear) + 1 y = final.values y = y.reshape(-1, 1) #区分训练数据和预测数据 num = len(y) #训练集 trainx = y[num - testyear - 1 - trainyear:num - testyear - 1].squeeze() trainy = y[num - testyear - 1:num - 1].squeeze() #测试集 testx = y[num - testyear - trainyear:num - testyear].squeeze() testy = y[num - testyear:].squeeze() #开始训练 trainpre, a, b = RGM(trainx, testyear) #获得测试结果 testpre = RGMpre(testx, testyear, a, b) #获得最终预测 testpredx = np.array(np.flipud(y[-1:-(trainyear + 1):-1])) finalpre = RGMpre(testpredx, testyear, a, b) mape = MAPE(testpre, testy) rmse = RMSE(testpre, testy) ypre = finalpre.reshape(1, -1).squeeze() trainyear = datayear[num - testyear:] # for t in testy: # count=-1 # for d in final[pretype]: # count+=1 # if t>d-5 and t<d+5: # # print("yes") # trainyear.append(final.index[count]) # break result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": trainpre, "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } #保存 return result
def QRsolow(hisfromyear, histoyear, fromyear, toyear, quatile=0.95, pretype="consumption", econamelist=["GDP"], city="云南省"): """ Parameters ---------- hisfromyear : str 历史数据起始年份 histoyear : str 历史数据终止年份 fromyear : str 预测起始年份 toyear : str 预测终止年份 pretype : str 预测类型:"consumption"、"load" quatile : float 分位数,默认为0.95 econamelist : list 选取的经济数据名称列表 city : str 选择城市,默认云南省 Returns ------- "trainfromyear":hisfromyear "traintoyear":histoyear "trainresult":ytrain, array 训练结果 "prefromyear":fromyear "pretoyear":toyear "preresult":ypre, array 预测结果 "MAPE":mape, float "RMSE":rmse, float """ def get_coef(data, pretype, econamelist, quatile): #获得分位数回归线性关系 #注意xnamelist 最多只能容纳5个变量,yname是str #n=len(xnamelist) n = len(econamelist) if n == 1: mod = smf.quantreg('%s ~ %s' % (pretype, econamelist[0]), data) elif n == 2: mod = smf.quantreg( '%s ~ %s+%s' % (pretype, econamelist[0], econamelist[1]), data) elif n == 3: mod = smf.quantreg( '%s ~ %s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2]), data) elif n == 4: mod = smf.quantreg( '%s ~ %s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2], econamelist[3]), data) elif n == 5: mod = smf.quantreg( '%s ~ %s+%s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2], econamelist[3], econamelist[4]), data) res = mod.fit(q=quatile) # print(res.summary()) #返回分位点,截距,各个参数系数 和 各个参数lb,ub return quatile, res.params['Intercept'], res.params[ econamelist], res.conf_int().loc[econamelist] def predict(data, intercept, coef, quatile, econamelist): #这里的data只有x没有y n = len(econamelist) pre = [intercept] * len(data.values) for i in range(n): pre = pre + coef[econamelist[i]] * data[econamelist[i]].values pre = np.exp(pre) return pre #判断经济因素数量是否合适 if len(econamelist) > 5: delnum = len(econamelist) - 5 print("经济因素选取不应超出5个,请删去%s个,再重新预测。" % delnum) elif city == "云南省": name = [pretype] finaldata = [] period = toyear - fromyear + 1 #读取历史负荷数据 datajson = getData("yunnan_year", pretype, hisfromyear, histoyear) data = json.loads(datajson) finaldata.append(data) #读取经济数据 for i in range(len(econamelist)): ecodatajson = getData("yunnan_year", econamelist[i], hisfromyear, histoyear) ecodata = json.loads(ecodata) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T #取对数 logfinal = data.apply(np.log) #预测经济数据 eco = predict.pre(logfinal.loc[:, econamelist[0]], econamelist[0], fromyear, toyear) for j in range(1, len(econamelist)): c = predict.pre(logfinal.loc[:, econamelist[j]], econamelist[j], fromyear, toyear) eco = pd.merge(eco, c, on="year") #预测 q, b, k, lbub = get_coef(logfinal, name, pretype, quatile) y = predict(eco, b, k, q, econamelist) #求训练集误差mape,rmse ytrain = y[:len(y) - period] ytraintrue = data[pretype].values[:len(y) - period] mape = MAPE(ytrain, ytraintrue) rmse = RMSE(ytrain, ytraintrue) print("MAPE=", mape) print("RMSE=", rmse) ypre = y[len(y) - period:] #返回结果 return { "trainfromyear": hisfromyear, "traintoyear": histoyear, "trainresult": ytrain, "prefromyear": fromyear, "pretoyear": toyear, "preresult": ypre, "MAPE": mape, "RMSE": rmse } else: return {"False": "暂不支持其他地区预测"}
def GBDT(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype="全社会用电量", city="云南省", LearningRate=0.5, MaxDepth=20, NumberofEstimators=500): if timestep > (int(EndYear) - int(StartYear) + 1): raise ValueError("训练步长过大,请调整后重试.") elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) + timestep): raise ValueError("历史时间长度小于预测时间长度,请增加历史时间长度或减小预测时间长度.") else: #读取数据,确定参数 name = [pretype] finaldata = [] outputlen = int(PreEndYear) - int(PreStartYear) + 1 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T test_size = 0 #测试数据集应当取0.3才可以 X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") gbdt = xgb.XGBRegressor(max_depth=MaxDepth, learning_rate=LearningRate, n_estimators=NumberofEstimators, silent=True, objective='reg:linear', booster='gblinear', n_jobs=50, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, random_state=0, seed=None, missing=None, importance_type='gain') # multi_model = MultiOutputRegressor(gbdt) multi_model.fit(X["train"], y["train"]) testdata = final.values num = len(X["train"]) selet = int(np.floor(num / 2)) testinput = X["train"][selet:, :] testoutput = y["train"][selet:, :] x_pre = np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape( 1, -1) y1_gbdt = multi_model.predict(testinput) y1_gbdt_real = np.array(y1_gbdt).reshape(-1, 1) y1_real = np.array(testoutput).reshape(-1, 1) mape = MAPE(y1_gbdt_real, y1_real) rmse = RMSE(y1_gbdt_real, y1_real) ytrain = y1_gbdt[-1] trainyear = [] for t in testoutput[-1]: count = -1 for d in final[pretype]: count += 1 if t > d - 1 and t < d + 1: trainyear.append(final.index[count]) break pre = multi_model.predict(x_pre) ypre = np.array(pre).flatten().tolist() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre, "MAPE": mape, "RMSE": rmse } #保存 return result
def SARIMAIndustry(StartYear,EndYear,PreStartYear,PreEndYear,pretype,city="云南省"): StartMonth="%s/1"%(StartYear) EndMonth="%s/12"%(EndYear) #读取月度数据 monthdatajson=getData("云南省_month_电力电量类-行业", pretype, StartMonth, EndMonth) monthdata=json.loads(monthdatajson) pdmonthdata=pd.DataFrame(monthdata,index=[pretype]) pdmonthdata=pdmonthdata.T #读取年度数据 yeardatajson=getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear) yeardata=json.loads(yeardatajson) pdyeardata=pd.DataFrame(yeardata,index=[pretype]) pdyeardata=pdyeardata.T totalyear=int(EndYear)-int(StartYear)+1 trainyear=math.floor(totalyear-totalyear*0.3)#2or5,意味着短期or中期 train_num=trainyear*12 train_data=pdmonthdata[pretype].values[:train_num] test_data=pdmonthdata[pretype].values[train_num:] mean = sum(train_data)/len(train_data) # 计算均值 data_mean = [data - mean for data in train_data] # 得到去均值后的序列 data_mean=np.array(data_mean) #做一阶差分差分,变量序列平稳. df_mean = pd.DataFrame(data_mean,index=pdmonthdata[pretype].values[:train_num],columns=['mean value']) df_mean_1 = np.diff(data_mean,1) # plt.plot(df_mean_1) # plt.show() # 进行ADF检验并打印结果 adf_summary = ts.adfuller(np.array(df_mean_1).reshape(-1)) # print(adf_summary) ###SARIMA-----ARIMA(p,d,q)(P,D,Q)s ### (p, d, q)是上述非季节性参数.(P, D, Q)遵循相同的定义.但适用于时间序列的季节分量. 术语s是时间序列的周期(季度为4 ,年度为12 ,等等). ###https://blog.csdn.net/weixin_39479282/article/details/89513624 ##select the best parameter proup of SARIMA, using AIC (Akaike信息标准) # Define the p, d and q parameters to take any value between 0 and 2 p=q=P=Q=range(0,2)#短期取得是(0,3) d=D=1#短期取得是1 parameters = itertools.product(p,q,P,Q) parameters_list = list(parameters) warnings.filterwarnings("ignore") # param_best=tuple() # param_seasonal_best=tuple() result = [] best_aic = float("inf") for parameters in parameters_list: try: model = sm.tsa.statespace.SARIMAX(df_mean_1, order=(parameters[0],d,parameters[1]), seasonal_order=(parameters[2], D, parameters[3], 12)).fit(disp=-1) # print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic)) except: continue aic = model.aic if aic < best_aic: best_aic = aic best_param = parameters result.append([parameters, model.aic]) # result_table = pd.DataFrame(result) # result_table.columns = ['parameters', 'aic'] # print(result_table.sort_values(by='aic', ascending=True).head()) ###prediction best_model=sm.tsa.statespace.SARIMAX(train_data, order=(best_param[0],d,best_param[1]), seasonal_order=(best_param[2], D, best_param[3], 12)).fit(disp=-1) test_predict=best_model.forecast(steps=len(test_data)) #将月度数据转化为年度数据 def month_to_year(test_predict): finalpredict=[] loadsum=0 for i in range(len(test_predict)): if ((i+1)%12==0) and loadsum!=0: loadsum=loadsum+test_predict[i] finalpredict.append(loadsum) loadsum=0 else: loadsum=loadsum+test_predict[i] finalpredict=np.array(finalpredict) return finalpredict finalpredict=month_to_year(test_predict) finaltrue=np.flipud(pdyeardata[pretype].values[-1:-(len(finalpredict)+1):-1]) mape=MAPE(finalpredict,finaltrue) rmse=RMSE(finalpredict,finaltrue) trainyear=[] for t in finaltrue: for year, data in pdyeardata.iterrows(): if t>data[pretype]-5 and t<data[pretype]+5: trainyear.append(year) break ytrain=np.array(finalpredict) #预测 outputlen=int(PreEndYear)-int(PreStartYear)+1 traindata=pdmonthdata[pretype].values best_model=sm.tsa.statespace.SARIMAX(traindata, order=(best_param[0],d,best_param[1]), seasonal_order=(best_param[2], D, best_param[3], 12)).fit(disp=-1) predict=best_model.forecast(steps=outputlen*12) finalpredict=month_to_year(predict) ypre=np.array(finalpredict) result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse} return result
def UnarylinearTime(StartYear, EndYear, PreStartYear, PreEndYear, pretype="全社会用电量", city="云南省", planflag=0, plan=0): """一元一次外推""" if city == "云南省": name = [pretype] finaldata = [] #读取历史负荷数据 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T realyear = np.arange(int(StartYear), int(EndYear) + 1) final["time"] = realyear x = final["time"].values * (1 + plan * 0.01) y = final[pretype].values #load x = x.reshape(-1, 1) y = y.reshape(-1, 1) preyear = np.arange(int(PreStartYear), int(PreEndYear) + 1) year = len(preyear) #区分训练数据和预测数据 num = len(x) if num < 2 + year: raise ValueError("历史数据过少或预测年份过长,请重新选择") elif year < 2: raise ValueError("该算法不支持两年以下的预测") else: trainx = x[num - 2 - year:num - 2] trainy = y[num - 2 - year:num - 2] testx = x[num - 1 - year:num - 1] testy = y[num - 1 - year:num - 1] # trainp = ic.getpred(trainx,year,planflag,plan) # trainp = np.array(trainp).T # trainpm = [] # for i in range(51): # trainpm.append(np.mean(trainp[i])) # trainpmm = trainpm.index(np.median(trainpm)) # trainpredx = trainp[trainpmm] # trainpredx = [k * trainx[-1] for k in trainpredx] # print(trainx) # print(trainpredx) reg = LinearRegression().fit(trainx, trainy) # reg = LinearRegression().fit(x, y) # testp = ic.getpred(testx,year,planflag,plan) # testp = np.array(testp).T # testpm = [] # for i in range(51): # testpm.append(np.mean(testp[i])) # testpmm = testpm.index(np.median(testpm)) # testpredx = testp[testpmm] # testpredx = [k * testx[-1] for k in testpredx] testpredy = [ testx * reg.coef_[0][0] + reg.intercept_[0] for testx in testx ] # loadp = reg.predict(testx)#趋势外推 mape = MAPE(testpredy, testy) rmse = RMSE(testpredy, testy) trainyear = realyear[num - 1 - year:num - 1] preyear = np.arange(int(PreStartYear), int(PreEndYear) + 1) * (1 + plan * 0.01) reg1 = LinearRegression().fit(x, y) # p = ic.getpred(preyear,year,planflag,plan) # p = np.array(p).T # pm = [] # for i in range(51): # pm.append(np.mean(p[i])) # pmm = pm.index(np.median(pm)) # predx = p[pmm] # predx = [k * x[-1] for k in predx] predy = [ x * reg1.coef_[0][0] + reg1.intercept_[0] for x in preyear ] predy = np.array(predy).squeeze() #存储 ytrain = np.array(testpredy).squeeze() ypre = np.array(predy).squeeze() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result
def LogarithmTime(StartYear,EndYear,PreStartYear,PreEndYear,pretype="全社会用电量",city="云南省",planflag=0,plan=0): """对数函数""" def func5(params, x): a, b = params return a * np.log(x) + b def error5(params, x, y): return func5(params, x) - y def slovePara5(x,y): p0 = [1, 0.02] Para = leastsq(error5, p0, args=(x, y)) return Para if city=="云南省": name=[pretype] finaldata=[] #读取历史负荷数据 datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data=json.loads(datajson) finaldata.append(data) #获取最终数据DataFrame final=pd.DataFrame(finaldata,index=name) final=final.T realyear = np.arange(int(StartYear),int(EndYear)+1) final["time"]=realyear x = final["time"].values*(1+plan*0.01) y = final[pretype].values #load x = x.reshape(-1,1) y = y.reshape(-1,1) #区分训练数据和预测数据 preyear = np.arange(int(PreStartYear),int(PreEndYear)+1)*(1+plan*0.01) year=len(preyear) #区分训练数据和预测数据 num=len(x) if num<3+year: raise ValueError("历史数据过少或预测年份过长,请重新选择") elif year<2: raise ValueError("该算法不支持两年以下的预测") else: trainx=x[num-2-year-1:num-2].squeeze() trainy=y[num-2-year-1:num-2].squeeze() testx=x[num-1-year:num].squeeze() testy=y[num-1-year:num].squeeze() Para = slovePara5(trainx,trainy) a, b = Para[0] testp = ic.getpred(testx,year+1,planflag,plan) testp = np.array(testp).T testpm = [] for i in range(51): testpm.append(np.mean(testp[i])) testpmm = testpm.index(np.median(testpm)) testpredx = testp[testpmm] testpredx = [k * testx[-1] for k in testpredx] testpredy = [a*np.log (x) + b for x in testx] trainyear=realyear[num-1-year:num] mape=MAPE(testpredy,testy) rmse=RMSE(testpredy,testy) x=x.squeeze() y=y.squeeze() Parapre = slovePara5(x,y) ap, bp = Parapre[0] p = ic.getpred(preyear,year,planflag,plan) p = np.array(p).T pm = [] for i in range(51): pm.append(np.mean(p[i])) pmm = pm.index(np.median(pm)) predx = p[pmm] predx = [k * x[-1] for k in predx] predy = [ap*np.log (x0) + bp for x0 in preyear] predy=np.array(predy).squeeze() #存储 ytrain=np.array(testpredy).squeeze() ypre=np.array(predy).squeeze() result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse} return result
def Unarylinear(StartYear, EndYear, PreStartYear, PreEndYear, pretype="全社会用电量", econamelist="GDP", city="云南省", planflag=1, plan=1, pro=1): """ Parameters ---------- StartYear : str 历史数据起始年份 EndYear : str 历史数据终止年份 PreStartYear : str 预测起始年份 PreEndYear : str 预测终止年份 pretype : str 预测类型:"consumption"、"load" econamelist : list 用到的社会经济类数据名称, e.g., ["GDP","人口"]. city : str, optional 预测城市. The default is "云南省". planflag : TYPE, optional 是否有规划值,1代表有,0代表没有. The default is 0. plan : TYPE, optional 规划指数值. The default is 0. Returns ------- None. """ econamelist = [econamelist] if len(econamelist) != 1: raise ValueError("仅支持选择一个因素变量") elif city == "云南省": name = [pretype] finaldata = [] #读取历史负荷数据 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) #读取经济数据 ecodatajson = getData("云南省_year_社会经济类", econamelist[0], StartYear, EndYear) ecodata = json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[0]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T x = final[econamelist[0]].values y = final[pretype].values #load x = x.reshape(-1, 1) y = y.reshape(-1, 1) #区分训练数据和预测数据 num = len(x) testyear = math.ceil(num / 8) if testyear < 2: raise ValueError("历史数据过少或预测年份过长,请重新选择") # if testyear<3: # raise ValueError("历史数据过少或预测年份过长,请重新选择") else: trainx = x[:num - testyear] trainy = y[:num - testyear] testx = x[num - testyear:] testy = y[num - testyear:] reg = LinearRegression().fit(trainx, trainy) # reg = LinearRegression().fit(x, y) testp = ic.getpred(testx, testyear, planflag, plan, pro) testp = np.array(testp).T testpm = [] for i in range(51): testpm.append(np.mean(testp[i])) testpmm = testpm.index(np.median(testpm)) testpredx = testp[testpmm] testpredx = [k * testx[-1] for k in testpredx] testpredy = [ testx * reg.coef_[0][0] + reg.intercept_[0] for testx in testpredx ] # loadp = reg.predict(testx)#趋势外推 mape = MAPE(testpredy, testy) rmse = RMSE(testpredy, testy) historyyear = np.arange(int(StartYear), int(EndYear) + 1) trainyear = historyyear[num - testyear:] # for t in testy: # count=-1 # for d in final[pretype]: # count+=1 # if t>d-5 and t<d+5: # # print("yes") # trainyear.append(final.index[count]) # break preyear = np.arange(int(PreStartYear), int(PreEndYear) + 1) year = len(preyear) p = ic.getpred(x, year, planflag, plan, pro) p = np.array(p).T pm = [] for i in range(51): pm.append(np.mean(p[i])) pmm = pm.index(np.median(pm)) predx = p[pmm] predx = [k * x[-1] for k in predx] predy = [x * reg.coef_[0][0] + reg.intercept_[0] for x in predx] predy = np.array(predy).squeeze() #存储 ytrain = np.array(testpredy).squeeze() ypre = np.array(predy).squeeze() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result
def RFIndustry(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype, n_estimators=50, city="云南省"): """ Parameters ---------- StartYear : str 历史数据起始年份 EndYear : str 历史数据终止年份 PreStartYear : str 预测起始年份 PreEndYear : str 预测终止年份 timestep : int 训练数据步长, 常常大于预测时间段的2倍 n_estimators : int 随机森林数目个数. The default is 50. pretype : str, optional 预测类型:"consumption"、"load". The default is "consumption". city : str, optional 选择城市. The default is "云南省". Returns ------- "trainfromyear":StartYear "traintoyear":EndYear "trainresult":ytrain, array 训练结果 "prefromyear":PreStartYear "pretoyear":PreEndYear "preresult":ypre, array 预测结果 "MAPE":mape, float "RMSE":rmse, float """ if timestep > (int(EndYear) - int(StartYear) + 1): raise ValueError("训练步长过大,请调整后重试") elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) + timestep): raise ValueError("历史时间长度小于预测时间长度,请增加历史时间长度或减小预测时间长度") else: name = [pretype] finaldata = [] outputlen = int(PreEndYear) - int(PreStartYear) + 1 #读取历史负荷数据 datajson = getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T test_size = 0 X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") y["train"].ravel() #构建随机森林模型 rf = RandomForestRegressor(n_estimators) #n_estimators:森林个数 rf.fit(X["train"], y["train"]) testdata = final[pretype].values testinput = [] testoutput = [] num = len(X["train"]) selet = int(np.floor(num / 2)) testinput = X["train"][selet:, :] testoutput = y["train"][selet:, :] #训练结果 y_rf = rf.predict(testinput) y_rf_real = np.array(y_rf).reshape(-1, 1) #训练数据预测结果 y_real = np.array(testoutput).reshape(-1, 1) mape = MAPE(y_rf_real, y_real) rmse = RMSE(y_rf_real, y_real) #目标结果,修正 pre_y_rf = rf.predict( np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape( 1, -1)) + 500 #保存训练结果 trainyear = [] for t in y_real: count = -1 for d in final[pretype]: count += 1 if t > d - 5 and t < d + 5: # print("yes") trainyear.append(final.index[count]) break ytrain = y_rf_real.flatten() ypre = pre_y_rf.flatten() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result
def SVM(StartYear, EndYear, PreStartYear, PreEndYear, timestep, pretype="全社会用电量", city="云南省"): #读取数据,确定参数 if timestep > (int(EndYear) - int(StartYear) + 1): raise ValueError("训练步长过大,请调整后重试") elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) + timestep): raise ValueError("历史时间长度小于预测时间长度与训练步长之和,请调整后重试") else: name = [pretype] finaldata = [] outputlen = int(PreEndYear) - int(PreStartYear) + 1 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data = json.loads(datajson) finaldata.append(data) final = pd.DataFrame(finaldata, index=name) final = final.T test_size = 0 #测试数据集应当取0才可以 X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") testdata = final[pretype].values testinput = [] testoutput = [] X, y = generate_data(final, timestep, outputlen, test_size=test_size, if_norm="no") svr = SVR(kernel="poly", gamma="scale", C=0.1) #kernel="linear","poly" multi_model = MultiOutputRegressor(svr) multi_model.fit(X["train"], y["train"]) testdata = final.values num = len(X["train"]) selet = int(np.floor(num / 2)) testinput = X["train"][selet:, :] testoutput = y["train"][selet:, :] y_svr = multi_model.predict(testinput) y_svr_real = np.array(y_svr).reshape(-1, 1) y_real = np.array(testoutput).reshape(-1, 1) mape = MAPE(y_svr_real, y_real) rmse = RMSE(y_svr_real, y_real) pre = multi_model.predict( np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape( 1, -1)) ytrain = y_svr[-1] trainyear = [] for t in testoutput[-1]: count = -1 for d in final[pretype]: count += 1 if t > d - 1 and t < d + 1: trainyear.append(final.index[count]) break ypre = np.array(pre).flatten() result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } #保存 return result