def FLR(StartYear, EndYear, PreStartYear, PreEndYear, timestep=15, pretype="consumption", city="云南省"): """ Parameters ---------- StartYear : TYPE DESCRIPTION. EndYear : TYPE DESCRIPTION. PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. timestep : TYPE DESCRIPTION. pretype : TYPE, optional DESCRIPTION. The default is "consumption". city : TYPE, optional DESCRIPTION. The default is "云南省". Returns ------- None. """ #读取数据 datajson = getData("yunnan_year_电力电量类", pretype, StartYear, EndYear) data = json.loads(datajson) name = [pretype] finaldata = [] finaldata.append(data) final = pd.DataFrame(finaldata, index=name) period = int(PreEndYear) - int(PreStartYear) + 1 econamelist = ["GDP1", "GDP2", "GDP3"] #读取经济数据 for i in range(len(econamelist)): ecodatajson = getData("yunnan_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata = json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T #获取训练所用的数据集 data1 = final.iloc[len(final.values) - timestep:] num = len(data1.values) #预测经济数据 eco = predict.pre(data1, econamelist[0], PreStartYear, PreEndYear) for j in range(1, len(econamelist)): c = predict.pre(data1, econamelist[j], PreStartYear, PreEndYear) eco = pd.merge(eco, c, on="year") #获得训练集和测试集 trainx = eco.loc[:, econamelist] trainy = data1.loc[:, pretype] prex = eco.loc[num:, econamelist] #创建模糊控制变量 GDP1 = ctrl.Antecedent(np.arange(100, 15000, 20), "gdp1") GDP2 = ctrl.Antecedent(np.arange(150, 20000, 20), "gdp2") # GDP3=ctrl.Antecedent(np.arange( 100, 25000, 20 ), "gdp3" ) fuload = ctrl.Consequent(np.arange(100, 8000, 1), "futureload") #定义模糊集和其隶属度函数 GDP1["very low"] = fuzz.trimf(GDP1.universe, [100, 300, 500]) GDP1["low"] = fuzz.trimf(GDP1.universe, [400, 850, 1250]) GDP1["medium"] = fuzz.trimf(GDP1.universe, [1000, 2500, 4000]) GDP1["high"] = fuzz.trimf(GDP1.universe, [3700, 5500, 7500]) GDP1["very high"] = fuzz.trimf(GDP1.universe, [7300, 12000, 15000]) GDP2["very low"] = fuzz.trimf(GDP2.universe, [100, 500, 900]) GDP2["low"] = fuzz.trimf(GDP2.universe, [500, 1450, 2600]) GDP2["medium"] = fuzz.trimf(GDP2.universe, [2500, 6500, 10500]) GDP2["high"] = fuzz.trimf(GDP2.universe, [9500, 12000, 14000]) GDP2["very high"] = fuzz.trimf(GDP2.universe, [13500, 16000, 20000]) # GDP3[ "very low" ] = fuzz.trimf(GDP3.universe, [ 100, 400, 700 ] ) # GDP3[ "low" ] = fuzz.trimf(GDP3.universe, [ 650, 1400, 2750 ] ) # GDP3[ "medium" ] = fuzz.trimf(GDP3.universe, [ 2600, 6000, 13000 ] ) # GDP3[ "high" ] = fuzz.trimf(GDP3.universe, [ 12000, 15000, 18000] ) # GDP3[ "very high" ] = fuzz.trimf(GDP3.universe, [ 17000, 21000, 25000] ) fuload["very low"] = fuzz.trimf(fuload.universe, [100, 200, 300]) fuload["low"] = fuzz.trimf(fuload.universe, [250, 550, 1100]) fuload["medium"] = fuzz.trimf(fuload.universe, [1050, 1900, 3000]) fuload["high"] = fuzz.trimf(fuload.universe, [2750, 3500, 5100]) fuload["very high"] = fuzz.trimf(fuload.universe, [5000, 8000, 8000]) # #定义模糊规则 rule = locals() rule1 = ctrl.Rule(GDP1["very low"] & GDP2["very low"], fuload["very low"]) rule2 = ctrl.Rule(GDP1["very low"] & GDP2["low"], fuload["very low"]) rule3 = ctrl.Rule(GDP1["very low"] & GDP2["medium"], fuload["low"]) rule4 = ctrl.Rule(GDP1["very low"] & GDP2["high"], fuload["medium"]) rule5 = ctrl.Rule(GDP1["very low"] & GDP2["very high"], fuload["medium"]) rule6 = ctrl.Rule(GDP1["low"] & GDP2["very low"], fuload["very low"]) rule7 = ctrl.Rule(GDP1["low"] & GDP2["low"], fuload["low"]) rule8 = ctrl.Rule(GDP1["low"] & GDP2["medium"], fuload["low"]) rule9 = ctrl.Rule(GDP1["low"] & GDP2["high"], fuload["medium"]) rule10 = ctrl.Rule(GDP1["low"] & GDP2["very high"], fuload["medium"]) rule11 = ctrl.Rule(GDP1["medium"] & GDP2["very low"], fuload["low"]) rule12 = ctrl.Rule(GDP1["medium"] & GDP2["low"], fuload["low"]) rule13 = ctrl.Rule(GDP1["medium"] & GDP2["medium"], fuload["medium"]) rule14 = ctrl.Rule(GDP1["medium"] & GDP2["high"], fuload["high"]) rule15 = ctrl.Rule(GDP1["medium"] & GDP2["very high"], fuload["medium"]) rule16 = ctrl.Rule(GDP1["high"] & GDP2["very low"], fuload["low"]) rule17 = ctrl.Rule(GDP1["high"] & GDP2["low"], fuload["medium"]) rule18 = ctrl.Rule(GDP1["high"] & GDP2["medium"], fuload["high"]) rule19 = ctrl.Rule(GDP1["high"] & GDP2["high"], fuload["high"]) rule20 = ctrl.Rule(GDP1["high"] & GDP2["very high"], fuload["very high"]) rule21 = ctrl.Rule(GDP1["very high"] & GDP2["very low"], fuload["low"]) rule22 = ctrl.Rule(GDP1["very high"] & GDP2["low"], fuload["low"]) rule23 = ctrl.Rule(GDP1["very high"] & GDP2["medium"], fuload["medium"]) rule24 = ctrl.Rule(GDP1["very high"] & GDP2["high"], fuload["high"]) rule25 = ctrl.Rule(GDP1["very high"] & GDP2["very high"], fuload["very high"]) fuzzy_ctrl = ctrl.ControlSystem([ rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8, rule9, rule10, rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, rule20, rule21, rule22, rule23, rule24, rule25 ]) consumptionSystem = ctrl.ControlSystemSimulation(fuzzy_ctrl) #评估 trainn = len(trainx) systemoutput = np.zeros(trainn, dtype=np.float64) for i in range(trainn): consumptionSystem.input["gdp1"] = trainx.loc[i, econamelist[0]] consumptionSystem.input["gdp2"] = trainx.loc[i, econamelist[1]] consumptionSystem.compute() systemoutput[i] = consumptionSystem.output["futureload"] mape = MAPE(systemoutput[num - period:num], trainy.values[num - period:num]) rmse = RMSE(systemoutput[num - period:num], trainy.values[num - period:num]) #保存结果 trainyear = data1.index ytrain = systemoutput[:num] ypre = np.array(systemoutput[num:]).reshape(1, -1) result = { "trainfromyear": trainyear[0], "traintoyear": trainyear[-1], "trainresult": ytrain, "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre, "MAPE": mape, "RMSE": rmse } return result
def ESQRM(StartYear, EndYear, PreStartYear, PreEndYear, quatile=0.95, pretype="consumption", econamelist=["GDP"], city="云南省"): """ Parameters ---------- StartYear : str 历史数据起始年份 EndYear : str 历史数据终止年份 PreStartYear : str 预测起始年份 PreEndYear : str 预测终止年份 pretype : str 预测类型:"consumption"、"load" quatile : float 分位数,默认为0.95 econamelist : list 选取的经济数据名称列表 city : str 选择城市,默认云南省 Returns ------- "trainfromyear":StartYear "traintoyear":EndYear "trainresult":ytrain, array 训练结果 "prefromyear":PreStartYear "pretoyear":PreEndYear "preresult":ypre, array 预测结果 "MAPE":mape, float "RMSE":rmse, float """ def get_coef(data, pretype, econamelist, quatile): #获得分位数回归线性关系 #注意econamelist 最多只能容纳5个变量,yname是str n = len(econamelist) # print("num",n) if n == 1: mod = smf.quantreg('%s ~ %s' % (pretype, econamelist[0]), data) elif n == 2: mod = smf.quantreg( '%s ~ %s+%s' % (pretype, econamelist[0], econamelist[1]), data) elif n == 3: mod = smf.quantreg( '%s ~ %s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2]), data) elif n == 4: mod = smf.quantreg( '%s ~ %s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2], econamelist[3]), data) elif n == 5: mod = smf.quantreg( '%s ~ %s+%s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1], econamelist[2], econamelist[3], econamelist[4]), data) res = mod.fit(q=quatile) # print(res.summary()) #返回分位点,截距,各个参数系数 和 各个参数lb,ub return quatile, res.params['Intercept'], res.params[ econamelist], res.conf_int().loc[econamelist] def predict(data, intercept, coef, quatile, econamelist): #这里的data只有x没有y n = len(econamelist) pre = [intercept] * len(data.values) for i in range(n): pre = pre + coef[econamelist[i]] * data[econamelist[i]].values pre = np.exp(pre) return pre #判断经济因素数量是否合适 if len(econamelist) > 5: delnum = len(econamelist) - 5 print("经济因素选取不应超出5个,请删去%s个,再重新预测。" % delnum) elif city == "云南省": name = [pretype] finaldata = [] period = int(PreEndYear) - int(PreStartYear) + 1 #读取历史负荷数据 datajson = getData("yunnan_year_社会经济类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) #读取经济数据 for i in range(len(econamelist)): ecodatajson = getData("yunnan_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata = json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T #取对数 logfinal = final.apply(np.log) #预测经济数据 # print(logfinal[econamelist[0]].to_frame().column) eco = preeco.pre(logfinal, econamelist[0], PreStartYear, PreEndYear) for j in range(1, len(econamelist)): c = preeco.pre(logfinal, econamelist[j], PreStartYear, PreEndYear) eco = pd.merge(eco, c, on="year") #预测 q, b, k, lbub = get_coef(logfinal, pretype, econamelist, quatile) y = predict(eco, b, k, q, econamelist) #求训练集误差mape,rmse ytrain = y[:len(y) - period] ytraintrue = final[pretype].values[:len(y) - period] mape = MAPE(ytrain, ytraintrue) rmse = RMSE(ytrain, ytraintrue) # print("MAPE=",mape) # print("RMSE=",rmse) ypre = y[len(y) - period:] #返回结果 result = { "trainfromyear": StartYear, "traintoyear": EndYear, "trainresult": list(ytrain), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": list(ypre), "MAPE": mape, "RMSE": rmse } else: result = {"False": "暂不支持其他地区预测"} return result
def FER(StartYear,EndYear,PreStartYear,PreEndYear,timestep,pretype="全社会用电量",city="云南省"): """ Parameters ---------- StartYear : TYPE DESCRIPTION. EndYear : TYPE DESCRIPTION. PreStartYear : TYPE DESCRIPTION. PreEndYear : TYPE DESCRIPTION. timestep : TYPE, optional DESCRIPTION. The default is 15. pretype : TYPE, optional DESCRIPTION. The default is "consumption". city : TYPE, optional DESCRIPTION. The default is "云南省". Returns ------- result : TYPE DESCRIPTION. """ def exponential_smoothing(series, alpha): #一次指数平滑 result = [series[0]] # first value is same as series for n in range(1, len(series)): result.append(alpha * series[n] + (1 - alpha) * result[n-1]) return result def double_exponential_smoothing(series, alpha, beta): #二次指数平滑 result = [series[0]] for n in range(1, len(series)): if n == 1: # initialize level, trend = series[0], series[1] - series[0] if n >= len(series): # we are forecasting value = result[-1] else: value = series[n] last_level, level = level, alpha*value + (1-alpha)*(level+trend) # a-hat at t trend = beta*(level-last_level) + (1-beta)*trend # b-hat at t final=level+trend result.append(final) return result if timestep > (int(EndYear)-int(StartYear)+1): raise ValueError("训练步长过大,请调整后重试.") elif int(PreEndYear)-int(PreStartYear)<1: raise ValueError("该算法不支持一年及一年内的预测.") elif timestep<(int(PreEndYear)-int(PreStartYear)+2): raise ValueError("训练步长小于预测年份区间长度,请增加训练步长.") else: #读取数据 datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear) data=json.loads(datajson) name=[pretype] finaldata=[] finaldata.append(data) final=pd.DataFrame(finaldata,index=name) period=int(PreEndYear)-int(PreStartYear)+1 econamelist=["第一产业GDP","第二产业GDP","第三产业GDP"] #读取经济数据 for i in range(len(econamelist)): ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata=json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final=pd.DataFrame(finaldata,index=name) final=final.T #获取训练所用的数据集 data1=final.iloc[len(final.values)-timestep:] num=len(data1.values) #预测经济数据 eco=predict.pre(data1,econamelist[0],PreStartYear,PreEndYear) for j in range(1,len(econamelist)): c=predict.pre(data1,econamelist[j],PreStartYear,PreEndYear) eco=pd.merge(eco,c,on="year") #获得训练集和测试集 trainx=eco.loc[:,econamelist] trainy=data1.loc[:,pretype] prex=eco.loc[num:,econamelist] #创建模糊控制变量 GDP1=ctrl.Antecedent(np.arange( 100, 15000, 20 ), "gdp1" ) GDP2=ctrl.Antecedent(np.arange( 150, 20000, 20 ), "gdp2" ) # GDP3=ctrl.Antecedent(np.arange( 100, 25000, 20 ), "gdp3" ) fuload=ctrl.Consequent(np.arange( 100, 8000, 1 ), "futureload" ) #定义模糊集和其隶属度函数 GDP1[ "very low" ] = fuzz.trimf( GDP1.universe, [ 100, 300, 500 ] ) GDP1[ "low" ] = fuzz.trimf( GDP1.universe, [ 400, 850, 1250 ] ) GDP1[ "medium" ] = fuzz.trimf( GDP1.universe, [ 1000, 2500, 4000 ] ) GDP1[ "high" ] = fuzz.trimf( GDP1.universe, [ 3700, 5500, 7500] ) GDP1[ "very high" ] = fuzz.trimf( GDP1.universe, [ 7300, 12000, 15000] ) GDP2[ "very low" ] = fuzz.trimf(GDP2.universe, [ 100, 500, 900 ] ) GDP2[ "low" ] = fuzz.trimf(GDP2.universe, [ 500, 1450, 2600 ] ) GDP2[ "medium" ] = fuzz.trimf(GDP2.universe, [ 2500, 6500, 10500 ] ) GDP2[ "high" ] = fuzz.trimf(GDP2.universe, [ 9500, 12000, 14000] ) GDP2[ "very high" ] = fuzz.trimf(GDP2.universe, [ 13500, 16000, 20000] ) # GDP3[ "very low" ] = fuzz.trimf(GDP3.universe, [ 100, 400, 700 ] ) # GDP3[ "low" ] = fuzz.trimf(GDP3.universe, [ 650, 1400, 2750 ] ) # GDP3[ "medium" ] = fuzz.trimf(GDP3.universe, [ 2600, 6000, 13000 ] ) # GDP3[ "high" ] = fuzz.trimf(GDP3.universe, [ 12000, 15000, 18000] ) # GDP3[ "very high" ] = fuzz.trimf(GDP3.universe, [ 17000, 21000, 25000] ) fuload[ "very low" ] = fuzz.trimf( fuload.universe, [ 100, 200, 300 ] ) fuload[ "low" ] = fuzz.trimf( fuload.universe, [ 250, 550, 1100 ] ) fuload[ "medium" ] = fuzz.trimf( fuload.universe, [ 1050, 1900, 3000 ] ) fuload[ "high" ] = fuzz.trimf( fuload.universe, [ 2750, 3500, 5100 ] ) fuload[ "very high" ] = fuzz.trimf(fuload.universe, [ 5000, 8000, 8000 ] ) # #定义模糊规则 rule=locals() rule1 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "very low" ], fuload[ "very low" ] ) rule2 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "low" ], fuload[ "very low" ] ) rule3 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "medium" ], fuload[ "low" ] ) rule4 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "high" ], fuload[ "medium" ] ) rule5 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "very high" ], fuload[ "medium" ] ) rule6 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "very low" ], fuload["very low" ] ) rule7 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "low" ], fuload[ "low" ] ) rule8 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "medium" ], fuload[ "low" ] ) rule9 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "high" ], fuload["medium" ] ) rule10 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "very high" ], fuload["medium" ] ) rule11 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "very low" ], fuload["low" ] ) rule12 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "low" ], fuload["low" ] ) rule13 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "medium" ], fuload["medium" ] ) rule14 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "high" ], fuload["high" ] ) rule15 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "very high" ], fuload["medium" ] ) rule16 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "very low" ], fuload["low" ] ) rule17 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "low" ], fuload["medium" ] ) rule18 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "medium" ], fuload["high"] ) rule19 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "high" ], fuload["high" ] ) rule20 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "very high" ], fuload["very high" ] ) rule21 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "very low" ], fuload["low" ] ) rule22 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "low" ], fuload["low" ] ) rule23 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "medium" ], fuload["medium" ] ) rule24 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "high" ], fuload["high" ] ) rule25 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "very high" ], fuload["very high" ] ) fuzzy_ctrl = ctrl.ControlSystem([ rule1, rule2, rule3, rule4, rule5,rule6, rule7, rule8, rule9, rule10, rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, rule20, rule21, rule22, rule23, rule24, rule25]) consumptionSystem = ctrl.ControlSystemSimulation( fuzzy_ctrl ) #评估 trainn=len(trainx) systemoutput=np.zeros(trainn, dtype=np.float64 ) for i in range(trainn): consumptionSystem.input["gdp1"] = trainx.loc[i,econamelist[0]] consumptionSystem.input["gdp2"] = trainx.loc[i,econamelist[1]] consumptionSystem.compute() systemoutput[i] = consumptionSystem.output["futureload"] alpha=0.9 beta=1 #对结果进行二次指数平滑 allexsystemoutput=double_exponential_smoothing(systemoutput[:num], alpha, beta) exsystemoutput=double_exponential_smoothing(systemoutput[num-period:num], alpha, beta) exprey=double_exponential_smoothing(systemoutput[num:], alpha, beta) mape=MAPE(exsystemoutput,trainy.values[num-period:num]) rmse=RMSE(exsystemoutput,trainy.values[num-period:num]) #保存结果 trainyear=data1.index ytrain=np.array(allexsystemoutput).reshape(1,-1).squeeze() ypre=np.array(exprey).reshape(1,-1).squeeze() result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse} return result
def PCAIndustry(StartYear,EndYear,PreStartYear,PreEndYear,pretype,econamelist,city="云南省"): if city=="云南省": name=[pretype] finaldata=[] period=int(PreEndYear)-int(PreStartYear)+1 historyyear=np.arange(int(StartYear),int(EndYear)+1) #读取历史负荷数据 datajson=getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear) # print(datajson) data=json.loads(datajson) finaldata.append(data) #读取经济数据 for i in range(len(econamelist)): ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata=json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final=pd.DataFrame(finaldata,index=name) final=final.T final["Year"]=historyyear #预测经济数据 # print(logfinal[econamelist[0]].to_frame().column) eco=preeco.pre(final,econamelist[0],PreStartYear,PreEndYear) for j in range(1,len(econamelist)): c=preeco.pre(final,econamelist[j],PreStartYear,PreEndYear) eco=pd.merge(eco,c,on="year") Index=eco.columns[1:].tolist()#经济特征名称 ##对特征数据进行归一化处理 scaler = StandardScaler() scaler.fit(eco[Index].values) Data_eco_scaler = scaler.transform(eco[Index].values) Data_eco_scaler=pd.DataFrame(data=Data_eco_scaler,columns=Index) Data_eco_scaler["Year"]=eco["year"].values#归一化后的特征数据 #获得训练数据集合测试数据集 train_start_year=int(StartYear) train_end_year=int(StartYear)+math.ceil(len(historyyear)*0.7) test_start_year=int(StartYear)+math.ceil(len(historyyear)*0.7) test_end_year=int(EndYear) x_train=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(train_start_year,train_end_year+1))] y_train=final.loc[final["Year"].isin(range(train_start_year,train_end_year+1))] x_test=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(test_start_year,test_end_year+1))] y_test=final.loc[final["Year"].isin(range(test_start_year,test_end_year+1))] #获取合适的PCA维度 pca = PCA(0.9) principalComponents = pca.fit_transform(Data_eco_scaler[Index].values) n_components = pca.n_components_#得到PCA的维度 #print("n_components = ",pca.n_components_) #进行PCA分析 pca = PCA(n_components) pca.fit(x_train[Index].values) x_train_pca=pca.transform(x_train[Index].values) y_train_pca=y_train[pretype] x_test_pca=pca.transform(x_test[Index].values) y_test_pca=y_test[pretype] #建立线性回归 pca_model = LinearRegression() pca_model.fit(x_train_pca, y_train_pca) pca_predict = pca_model.predict(x_test_pca) #评价指标 rmse = RMSE(pca_predict,y_test_pca) mape = MAPE(pca_predict,y_test_pca) #保存训练结果 # trainyear=[] # for t in y_test_pca: # for d in final.values: # if t>d[1]-5 and t<d[1]+5: # trainyear.append(d[0]) # break trainyear=[] for t in y_test_pca: count=-1 for d in final[pretype]: count+=1 if t>d-5 and t<d+5: # print("yes") trainyear.append(final.index[count]) break #预测 predata=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(int(PreStartYear),int(PreEndYear)+1))] predatatrain=pca.transform(predata[Index].values) predict=pca_model.predict(predatatrain) #PCA线性模型参数 #pca_coef = pca_model.coef_ #存储 ytrain=pca_predict.tolist() ypre=np.array(predict).squeeze().tolist() result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain,"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre,"MAPE":mape,"RMSE":rmse} return result
def QuantileRegression(StartYear, EndYear, PreStartYear, PreEndYear, quatile=0.95, pretype="全社会用电量", econamelist=["GDP"], city="云南省"): #首先需要回归得到未来的经济数据 def get_coef(data, xnamelist, yname, quatile): #获得分位数回归线性关系 #注意xnamelist 最多只能容纳5个变量,yname是str n = len(xnamelist) print(yname, xnamelist) if n == 1: mod = smf.quantreg('%s ~ %s' % (yname, xnamelist[0]), data) elif n == 2: mod = smf.quantreg( '%s ~ %s+%s' % (yname, xnamelist[0], xnamelist[1]), data) elif n == 3: mod = smf.quantreg( '%s ~ %s+%s+%s' % (yname, xnamelist[0], xnamelist[1], xnamelist[2]), data) elif n == 4: mod = smf.quantreg( '%s ~ %s+%s+%s+%s' % (yname, xnamelist[0], xnamelist[1], xnamelist[2], xnamelist[3]), data) elif n == 5: mod = smf.quantreg( '%s ~ %s+%s+%s+%s+%s' % (yname, xnamelist[0], xnamelist[1], xnamelist[2], xnamelist[3], xnamelist[4]), data) res = mod.fit(q=quatile) print(res.summary()) #返回分位点,截距,各个参数系数 和 各个参数lb,ub return quatile, res.params['Intercept'], res.params[ xnamelist], res.conf_int().loc[xnamelist] def predict(data, intercept, coef, quatile, xnamelist): #这里的data只有x没有y n = len(xnamelist) pre = [intercept] * len(data.values) for i in range(n): pre = pre + coef[xnamelist[i]] * data[xnamelist[i]].values return pre #判断经济因素数量是否合适 if len(econamelist) > 5: delnum = len(econamelist) - 5 raise ValueError("经济因素选取不应超出 5 个,请删去 %s 个,再重新预测" % delnum) elif int(PreEndYear) - int(PreStartYear) < 1: raise ValueError("该算法不支持一年及一年内的预测") elif (int(EndYear) - int(StartYear) + 1) < 5: raise ValueError("历史年份区间过短,建议历史年份区间在 5 年以上") elif city == "云南省": name = [pretype] finaldata = [] period = int(PreEndYear) - int(PreStartYear) + 1 #读取历史负荷数据 datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear) # print(datajson) data = json.loads(datajson) finaldata.append(data) #读取经济数据 for i in range(len(econamelist)): ecodatajson = getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear) ecodata = json.loads(ecodatajson) finaldata.append(ecodata) name.append(econamelist[i]) #获取最终数据DataFrame final = pd.DataFrame(finaldata, index=name) final = final.T #预测经济数据 # print(logfinal[econamelist[0]].to_frame().column) eco = preeco.pre(final, econamelist[0], PreStartYear, PreEndYear) for j in range(1, len(econamelist)): c = preeco.pre(final, econamelist[j], PreStartYear, PreEndYear) eco = pd.merge(eco, c, on="year") q, b, k, lbub = get_coef(final, econamelist, pretype, 0.95) y = predict(eco, b, k, q, econamelist) #求mape,rmse ytrain = y[:len(y) - period] ytraintrue = final[pretype].values[:len(y) - period] mape = MAPE(ytrain, ytraintrue) rmse = RMSE(ytrain, ytraintrue) ypre = y[len(y) - period:] #返回结果 result = { "trainfromyear": StartYear, "traintoyear": EndYear, "trainresult": ytrain.tolist(), "prefromyear": PreStartYear, "pretoyear": PreEndYear, "preresult": ypre.tolist(), "MAPE": mape, "RMSE": rmse } return result else: raise ValueError("暂不支持其他地区预测")