コード例 #1
0
ファイル: FLR.py プロジェクト: yuxiqian/ynpowerbackend
def FLR(StartYear,
        EndYear,
        PreStartYear,
        PreEndYear,
        timestep=15,
        pretype="consumption",
        city="云南省"):
    """
    

    Parameters
    ----------
    StartYear : TYPE
        DESCRIPTION.
    EndYear : TYPE
        DESCRIPTION.
    PreStartYear : TYPE
        DESCRIPTION.
    PreEndYear : TYPE
        DESCRIPTION.
    timestep : TYPE
        DESCRIPTION.
    pretype : TYPE, optional
        DESCRIPTION. The default is "consumption".
    city : TYPE, optional
        DESCRIPTION. The default is "云南省".

    Returns
    -------
    None.

    """
    #读取数据
    datajson = getData("yunnan_year_电力电量类", pretype, StartYear, EndYear)
    data = json.loads(datajson)

    name = [pretype]
    finaldata = []
    finaldata.append(data)
    final = pd.DataFrame(finaldata, index=name)

    period = int(PreEndYear) - int(PreStartYear) + 1

    econamelist = ["GDP1", "GDP2", "GDP3"]
    #读取经济数据
    for i in range(len(econamelist)):

        ecodatajson = getData("yunnan_year_社会经济类", econamelist[i], StartYear,
                              EndYear)
        ecodata = json.loads(ecodatajson)
        finaldata.append(ecodata)
        name.append(econamelist[i])
        #获取最终数据DataFrame
    final = pd.DataFrame(finaldata, index=name)
    final = final.T
    #获取训练所用的数据集
    data1 = final.iloc[len(final.values) - timestep:]
    num = len(data1.values)

    #预测经济数据
    eco = predict.pre(data1, econamelist[0], PreStartYear, PreEndYear)
    for j in range(1, len(econamelist)):
        c = predict.pre(data1, econamelist[j], PreStartYear, PreEndYear)
        eco = pd.merge(eco, c, on="year")

    #获得训练集和测试集
    trainx = eco.loc[:, econamelist]
    trainy = data1.loc[:, pretype]
    prex = eco.loc[num:, econamelist]

    #创建模糊控制变量

    GDP1 = ctrl.Antecedent(np.arange(100, 15000, 20), "gdp1")
    GDP2 = ctrl.Antecedent(np.arange(150, 20000, 20), "gdp2")
    # GDP3=ctrl.Antecedent(np.arange( 100, 25000, 20 ), "gdp3" )
    fuload = ctrl.Consequent(np.arange(100, 8000, 1), "futureload")

    #定义模糊集和其隶属度函数

    GDP1["very low"] = fuzz.trimf(GDP1.universe, [100, 300, 500])
    GDP1["low"] = fuzz.trimf(GDP1.universe, [400, 850, 1250])
    GDP1["medium"] = fuzz.trimf(GDP1.universe, [1000, 2500, 4000])
    GDP1["high"] = fuzz.trimf(GDP1.universe, [3700, 5500, 7500])
    GDP1["very high"] = fuzz.trimf(GDP1.universe, [7300, 12000, 15000])

    GDP2["very low"] = fuzz.trimf(GDP2.universe, [100, 500, 900])
    GDP2["low"] = fuzz.trimf(GDP2.universe, [500, 1450, 2600])
    GDP2["medium"] = fuzz.trimf(GDP2.universe, [2500, 6500, 10500])
    GDP2["high"] = fuzz.trimf(GDP2.universe, [9500, 12000, 14000])
    GDP2["very high"] = fuzz.trimf(GDP2.universe, [13500, 16000, 20000])

    # GDP3[ "very low" ] = fuzz.trimf(GDP3.universe, [ 100, 400, 700 ] )
    # GDP3[ "low" ] = fuzz.trimf(GDP3.universe, [ 650, 1400, 2750 ] )
    # GDP3[ "medium" ] = fuzz.trimf(GDP3.universe, [ 2600, 6000, 13000 ] )
    # GDP3[ "high" ] = fuzz.trimf(GDP3.universe, [ 12000, 15000, 18000] )
    # GDP3[ "very high" ] = fuzz.trimf(GDP3.universe, [ 17000, 21000, 25000] )

    fuload["very low"] = fuzz.trimf(fuload.universe, [100, 200, 300])
    fuload["low"] = fuzz.trimf(fuload.universe, [250, 550, 1100])
    fuload["medium"] = fuzz.trimf(fuload.universe, [1050, 1900, 3000])
    fuload["high"] = fuzz.trimf(fuload.universe, [2750, 3500, 5100])
    fuload["very high"] = fuzz.trimf(fuload.universe, [5000, 8000, 8000])

    # #定义模糊规则
    rule = locals()
    rule1 = ctrl.Rule(GDP1["very low"] & GDP2["very low"], fuload["very low"])
    rule2 = ctrl.Rule(GDP1["very low"] & GDP2["low"], fuload["very low"])
    rule3 = ctrl.Rule(GDP1["very low"] & GDP2["medium"], fuload["low"])
    rule4 = ctrl.Rule(GDP1["very low"] & GDP2["high"], fuload["medium"])
    rule5 = ctrl.Rule(GDP1["very low"] & GDP2["very high"], fuload["medium"])

    rule6 = ctrl.Rule(GDP1["low"] & GDP2["very low"], fuload["very low"])
    rule7 = ctrl.Rule(GDP1["low"] & GDP2["low"], fuload["low"])
    rule8 = ctrl.Rule(GDP1["low"] & GDP2["medium"], fuload["low"])
    rule9 = ctrl.Rule(GDP1["low"] & GDP2["high"], fuload["medium"])
    rule10 = ctrl.Rule(GDP1["low"] & GDP2["very high"], fuload["medium"])

    rule11 = ctrl.Rule(GDP1["medium"] & GDP2["very low"], fuload["low"])
    rule12 = ctrl.Rule(GDP1["medium"] & GDP2["low"], fuload["low"])
    rule13 = ctrl.Rule(GDP1["medium"] & GDP2["medium"], fuload["medium"])
    rule14 = ctrl.Rule(GDP1["medium"] & GDP2["high"], fuload["high"])
    rule15 = ctrl.Rule(GDP1["medium"] & GDP2["very high"], fuload["medium"])

    rule16 = ctrl.Rule(GDP1["high"] & GDP2["very low"], fuload["low"])
    rule17 = ctrl.Rule(GDP1["high"] & GDP2["low"], fuload["medium"])
    rule18 = ctrl.Rule(GDP1["high"] & GDP2["medium"], fuload["high"])
    rule19 = ctrl.Rule(GDP1["high"] & GDP2["high"], fuload["high"])
    rule20 = ctrl.Rule(GDP1["high"] & GDP2["very high"], fuload["very high"])

    rule21 = ctrl.Rule(GDP1["very high"] & GDP2["very low"], fuload["low"])
    rule22 = ctrl.Rule(GDP1["very high"] & GDP2["low"], fuload["low"])
    rule23 = ctrl.Rule(GDP1["very high"] & GDP2["medium"], fuload["medium"])
    rule24 = ctrl.Rule(GDP1["very high"] & GDP2["high"], fuload["high"])
    rule25 = ctrl.Rule(GDP1["very high"] & GDP2["very high"],
                       fuload["very high"])
    fuzzy_ctrl = ctrl.ControlSystem([
        rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8, rule9, rule10,
        rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19,
        rule20, rule21, rule22, rule23, rule24, rule25
    ])

    consumptionSystem = ctrl.ControlSystemSimulation(fuzzy_ctrl)

    #评估
    trainn = len(trainx)

    systemoutput = np.zeros(trainn, dtype=np.float64)

    for i in range(trainn):

        consumptionSystem.input["gdp1"] = trainx.loc[i, econamelist[0]]
        consumptionSystem.input["gdp2"] = trainx.loc[i, econamelist[1]]
        consumptionSystem.compute()
        systemoutput[i] = consumptionSystem.output["futureload"]

    mape = MAPE(systemoutput[num - period:num],
                trainy.values[num - period:num])
    rmse = RMSE(systemoutput[num - period:num],
                trainy.values[num - period:num])

    #保存结果

    trainyear = data1.index
    ytrain = systemoutput[:num]
    ypre = np.array(systemoutput[num:]).reshape(1, -1)

    result = {
        "trainfromyear": trainyear[0],
        "traintoyear": trainyear[-1],
        "trainresult": ytrain,
        "prefromyear": PreStartYear,
        "pretoyear": PreEndYear,
        "preresult": ypre,
        "MAPE": mape,
        "RMSE": rmse
    }

    return result
コード例 #2
0
def ESQRM(StartYear,
          EndYear,
          PreStartYear,
          PreEndYear,
          quatile=0.95,
          pretype="consumption",
          econamelist=["GDP"],
          city="云南省"):
    """
    

    Parameters
    ----------
    StartYear : str
        历史数据起始年份
    EndYear : str
        历史数据终止年份
    PreStartYear : str
        预测起始年份
    PreEndYear : str
        预测终止年份
    pretype : str
        预测类型:"consumption"、"load"
    quatile : float
        分位数,默认为0.95
    econamelist : list
        选取的经济数据名称列表
    city : str
        选择城市,默认云南省

    Returns
    -------
    "trainfromyear":StartYear  
        
    "traintoyear":EndYear
    
    "trainresult":ytrain,  array
        训练结果
    "prefromyear":PreStartYear
    
    "pretoyear":PreEndYear
    
    "preresult":ypre,  array
        预测结果
    "MAPE":mape, float
        
    "RMSE":rmse, float
        

    """
    def get_coef(data, pretype, econamelist, quatile):
        #获得分位数回归线性关系
        #注意econamelist 最多只能容纳5个变量,yname是str
        n = len(econamelist)
        # print("num",n)
        if n == 1:
            mod = smf.quantreg('%s ~ %s' % (pretype, econamelist[0]), data)
        elif n == 2:
            mod = smf.quantreg(
                '%s ~ %s+%s' % (pretype, econamelist[0], econamelist[1]), data)
        elif n == 3:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s' %
                (pretype, econamelist[0], econamelist[1], econamelist[2]),
                data)
        elif n == 4:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1],
                                      econamelist[2], econamelist[3]), data)
        elif n == 5:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s+%s+%s' %
                (pretype, econamelist[0], econamelist[1], econamelist[2],
                 econamelist[3], econamelist[4]), data)
        res = mod.fit(q=quatile)
        # print(res.summary())
        #返回分位点,截距,各个参数系数 和 各个参数lb,ub
        return quatile, res.params['Intercept'], res.params[
            econamelist], res.conf_int().loc[econamelist]

    def predict(data, intercept, coef, quatile, econamelist):
        #这里的data只有x没有y
        n = len(econamelist)
        pre = [intercept] * len(data.values)
        for i in range(n):
            pre = pre + coef[econamelist[i]] * data[econamelist[i]].values
        pre = np.exp(pre)
        return pre

    #判断经济因素数量是否合适
    if len(econamelist) > 5:
        delnum = len(econamelist) - 5
        print("经济因素选取不应超出5个,请删去%s个,再重新预测。" % delnum)
    elif city == "云南省":
        name = [pretype]
        finaldata = []
        period = int(PreEndYear) - int(PreStartYear) + 1

        #读取历史负荷数据
        datajson = getData("yunnan_year_社会经济类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)

        #读取经济数据
        for i in range(len(econamelist)):

            ecodatajson = getData("yunnan_year_社会经济类", econamelist[i],
                                  StartYear, EndYear)
            ecodata = json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])

        #获取最终数据DataFrame
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        #取对数
        logfinal = final.apply(np.log)

        #预测经济数据
        # print(logfinal[econamelist[0]].to_frame().column)
        eco = preeco.pre(logfinal, econamelist[0], PreStartYear, PreEndYear)
        for j in range(1, len(econamelist)):
            c = preeco.pre(logfinal, econamelist[j], PreStartYear, PreEndYear)
            eco = pd.merge(eco, c, on="year")

        #预测
        q, b, k, lbub = get_coef(logfinal, pretype, econamelist, quatile)
        y = predict(eco, b, k, q, econamelist)

        #求训练集误差mape,rmse
        ytrain = y[:len(y) - period]
        ytraintrue = final[pretype].values[:len(y) - period]
        mape = MAPE(ytrain, ytraintrue)
        rmse = RMSE(ytrain, ytraintrue)
        # print("MAPE=",mape)
        # print("RMSE=",rmse)
        ypre = y[len(y) - period:]

        #返回结果
        result = {
            "trainfromyear": StartYear,
            "traintoyear": EndYear,
            "trainresult": list(ytrain),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": list(ypre),
            "MAPE": mape,
            "RMSE": rmse
        }
    else:
        result = {"False": "暂不支持其他地区预测"}
    return result
コード例 #3
0
ファイル: FER.py プロジェクト: Lyanf/ynpowerbackend
def FER(StartYear,EndYear,PreStartYear,PreEndYear,timestep,pretype="全社会用电量",city="云南省"):
    """
    

    Parameters
    ----------
    StartYear : TYPE
        DESCRIPTION.
    EndYear : TYPE
        DESCRIPTION.
    PreStartYear : TYPE
        DESCRIPTION.
    PreEndYear : TYPE
        DESCRIPTION.
    timestep : TYPE, optional
        DESCRIPTION. The default is 15.
    pretype : TYPE, optional
        DESCRIPTION. The default is "consumption".
    city : TYPE, optional
        DESCRIPTION. The default is "云南省".

    Returns
    -------
    result : TYPE
        DESCRIPTION.

    """
    def exponential_smoothing(series, alpha):
        #一次指数平滑
        result = [series[0]] # first value is same as series
        for n in range(1, len(series)):
            result.append(alpha * series[n] + (1 - alpha) * result[n-1])
        return result
    
    def double_exponential_smoothing(series, alpha, beta):
        #二次指数平滑
        result = [series[0]]
        for n in range(1, len(series)):
            if n == 1: # initialize
                level, trend = series[0], series[1] - series[0]
            if n >= len(series): # we are forecasting
              value = result[-1]
            else:
              value = series[n]
            last_level, level = level, alpha*value + (1-alpha)*(level+trend) # a-hat at t
            trend = beta*(level-last_level) + (1-beta)*trend # b-hat at t
            final=level+trend
            result.append(final)
        return result
    if timestep > (int(EndYear)-int(StartYear)+1):
        raise ValueError("训练步长过大,请调整后重试.")
    elif int(PreEndYear)-int(PreStartYear)<1:
        raise ValueError("该算法不支持一年及一年内的预测.")
    elif timestep<(int(PreEndYear)-int(PreStartYear)+2):
        raise ValueError("训练步长小于预测年份区间长度,请增加训练步长.")
    else:
        #读取数据
        datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        data=json.loads(datajson)
        
        name=[pretype]
        finaldata=[]
        finaldata.append(data)
        final=pd.DataFrame(finaldata,index=name)
    
        period=int(PreEndYear)-int(PreStartYear)+1
    
        econamelist=["第一产业GDP","第二产业GDP","第三产业GDP"]
        #读取经济数据
        for i in range(len(econamelist)):
            
            ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear)
            ecodata=json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])
            #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
        #获取训练所用的数据集
        data1=final.iloc[len(final.values)-timestep:]
        num=len(data1.values)
        
        #预测经济数据
        eco=predict.pre(data1,econamelist[0],PreStartYear,PreEndYear)
        for j in range(1,len(econamelist)):
            c=predict.pre(data1,econamelist[j],PreStartYear,PreEndYear)
            eco=pd.merge(eco,c,on="year")  
    
        #获得训练集和测试集
        trainx=eco.loc[:,econamelist]
        trainy=data1.loc[:,pretype]
        prex=eco.loc[num:,econamelist]
    
    
    
        #创建模糊控制变量
        
        GDP1=ctrl.Antecedent(np.arange( 100, 15000, 20 ), "gdp1" )
        GDP2=ctrl.Antecedent(np.arange( 150, 20000, 20 ), "gdp2" )
        # GDP3=ctrl.Antecedent(np.arange( 100, 25000, 20 ), "gdp3" )
        fuload=ctrl.Consequent(np.arange( 100, 8000, 1 ), "futureload" )
        
        #定义模糊集和其隶属度函数
        
        GDP1[ "very low" ] = fuzz.trimf( GDP1.universe, [ 100, 300, 500 ] )
        GDP1[ "low" ] = fuzz.trimf( GDP1.universe, [ 400, 850, 1250 ] )
        GDP1[ "medium" ] = fuzz.trimf( GDP1.universe, [ 1000, 2500, 4000 ] )
        GDP1[ "high" ] = fuzz.trimf( GDP1.universe, [ 3700, 5500, 7500] )
        GDP1[ "very high" ] = fuzz.trimf( GDP1.universe, [ 7300, 12000, 15000] )
        
        GDP2[ "very low" ] = fuzz.trimf(GDP2.universe, [ 100, 500, 900 ] )
        GDP2[ "low" ] = fuzz.trimf(GDP2.universe, [ 500, 1450, 2600 ] )
        GDP2[ "medium" ] = fuzz.trimf(GDP2.universe, [ 2500, 6500, 10500 ] )
        GDP2[ "high" ] = fuzz.trimf(GDP2.universe, [ 9500, 12000, 14000] )
        GDP2[ "very high" ] = fuzz.trimf(GDP2.universe, [ 13500, 16000, 20000] )
    
        # GDP3[ "very low" ] = fuzz.trimf(GDP3.universe, [ 100, 400, 700 ] )
        # GDP3[ "low" ] = fuzz.trimf(GDP3.universe, [ 650, 1400, 2750 ] )
        # GDP3[ "medium" ] = fuzz.trimf(GDP3.universe, [ 2600, 6000, 13000 ] )
        # GDP3[ "high" ] = fuzz.trimf(GDP3.universe, [ 12000, 15000, 18000] )
        # GDP3[ "very high" ] = fuzz.trimf(GDP3.universe, [ 17000, 21000, 25000] )
    
        fuload[ "very low" ] = fuzz.trimf( fuload.universe, [ 100, 200, 300 ] )
        fuload[ "low" ] = fuzz.trimf( fuload.universe, [ 250, 550, 1100 ] )
        fuload[ "medium" ] = fuzz.trimf( fuload.universe, [ 1050, 1900, 3000 ] )
        fuload[ "high" ] = fuzz.trimf( fuload.universe, [ 2750, 3500, 5100 ] )
        fuload[ "very high" ] = fuzz.trimf(fuload.universe, [ 5000, 8000, 8000 ] )
        
        # #定义模糊规则
        rule=locals()
        rule1 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "very low" ], fuload[ "very low" ] )
        rule2 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "low" ], fuload[ "very low" ] )
        rule3 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "medium" ], fuload[ "low" ] )
        rule4 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "high"  ], fuload[ "medium" ] )
        rule5 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "very high" ], fuload[ "medium" ] )
        
        rule6 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "very low" ], fuload["very low" ] )
        rule7 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "low" ], fuload[ "low"  ] )
        rule8 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "medium" ], fuload[ "low"  ]  )
        rule9 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "high"  ], fuload["medium" ]  )
        rule10 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "very high"  ], fuload["medium" ] )
        
        rule11 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "very low" ], fuload["low" ] )
        rule12 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "low"  ], fuload["low" ] )
        rule13 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "medium" ], fuload["medium" ] )
        rule14 = ctrl.Rule(GDP1[  "medium" ]&GDP2[ "high"  ], fuload["high" ] )
        rule15 = ctrl.Rule(GDP1[  "medium" ]&GDP2[ "very high" ], fuload["medium" ] )
        
        rule16 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "very low" ], fuload["low" ] )
        rule17 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "low"  ], fuload["medium" ] )
        rule18 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "medium" ], fuload["high"] )
        rule19 = ctrl.Rule(GDP1[  "high" ]&GDP2[ "high"  ], fuload["high" ] )
        rule20 = ctrl.Rule(GDP1[  "high" ]&GDP2[ "very high" ], fuload["very high" ] )
        
        rule21 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "very low" ], fuload["low" ] )
        rule22 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "low"  ], fuload["low"  ] )
        rule23 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "medium" ], fuload["medium" ] )
        rule24 = ctrl.Rule(GDP1[  "very high" ]&GDP2[ "high"  ], fuload["high" ] )
        rule25 = ctrl.Rule(GDP1[  "very high" ]&GDP2[ "very high" ], fuload["very high" ] )
        fuzzy_ctrl = ctrl.ControlSystem([ rule1, rule2, rule3, rule4, rule5,rule6, rule7, rule8, rule9, rule10,
                                         rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, rule20,
                                         rule21, rule22, rule23, rule24, rule25])
    
        consumptionSystem = ctrl.ControlSystemSimulation( fuzzy_ctrl )
    
        #评估
        trainn=len(trainx)
    
        
        systemoutput=np.zeros(trainn, dtype=np.float64 )
        
        
        for i in range(trainn):
            consumptionSystem.input["gdp1"] = trainx.loc[i,econamelist[0]]
            consumptionSystem.input["gdp2"] = trainx.loc[i,econamelist[1]]
            consumptionSystem.compute()
            systemoutput[i] = consumptionSystem.output["futureload"]
    
    
        alpha=0.9
        beta=1
        #对结果进行二次指数平滑
        allexsystemoutput=double_exponential_smoothing(systemoutput[:num], alpha, beta)
        exsystemoutput=double_exponential_smoothing(systemoutput[num-period:num], alpha, beta)
        exprey=double_exponential_smoothing(systemoutput[num:], alpha, beta)
    
        mape=MAPE(exsystemoutput,trainy.values[num-period:num])
        rmse=RMSE(exsystemoutput,trainy.values[num-period:num])
        #保存结果
    
        trainyear=data1.index
        ytrain=np.array(allexsystemoutput).reshape(1,-1).squeeze()
        ypre=np.array(exprey).reshape(1,-1).squeeze()
    
        result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse}
    
        return result
コード例 #4
0
def PCAIndustry(StartYear,EndYear,PreStartYear,PreEndYear,pretype,econamelist,city="云南省"):
    
    if city=="云南省":
        name=[pretype]
        finaldata=[]
        period=int(PreEndYear)-int(PreStartYear)+1
        historyyear=np.arange(int(StartYear),int(EndYear)+1)
        
        #读取历史负荷数据
        datajson=getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear)
        # print(datajson)
        data=json.loads(datajson)
        finaldata.append(data)
        
        #读取经济数据
        for i in range(len(econamelist)):
            ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear)
            ecodata=json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])
        
        #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
        final["Year"]=historyyear
        
        #预测经济数据
        # print(logfinal[econamelist[0]].to_frame().column)
        eco=preeco.pre(final,econamelist[0],PreStartYear,PreEndYear)
        for j in range(1,len(econamelist)):
            c=preeco.pre(final,econamelist[j],PreStartYear,PreEndYear)
            eco=pd.merge(eco,c,on="year")  

        Index=eco.columns[1:].tolist()#经济特征名称
        


        ##对特征数据进行归一化处理
        scaler = StandardScaler()
        scaler.fit(eco[Index].values)
        Data_eco_scaler = scaler.transform(eco[Index].values)
        Data_eco_scaler=pd.DataFrame(data=Data_eco_scaler,columns=Index)

        Data_eco_scaler["Year"]=eco["year"].values#归一化后的特征数据
        
        
        #获得训练数据集合测试数据集
                
        train_start_year=int(StartYear)
        train_end_year=int(StartYear)+math.ceil(len(historyyear)*0.7)
        test_start_year=int(StartYear)+math.ceil(len(historyyear)*0.7)
        test_end_year=int(EndYear)



        x_train=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(train_start_year,train_end_year+1))]
        y_train=final.loc[final["Year"].isin(range(train_start_year,train_end_year+1))]
        x_test=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(test_start_year,test_end_year+1))]
        y_test=final.loc[final["Year"].isin(range(test_start_year,test_end_year+1))]


        #获取合适的PCA维度
        pca = PCA(0.9)
        principalComponents = pca.fit_transform(Data_eco_scaler[Index].values)
        n_components = pca.n_components_#得到PCA的维度
        #print("n_components = ",pca.n_components_)
        
        #进行PCA分析
        pca = PCA(n_components)
        
        pca.fit(x_train[Index].values)
        
        x_train_pca=pca.transform(x_train[Index].values)
        y_train_pca=y_train[pretype]
        x_test_pca=pca.transform(x_test[Index].values)
        y_test_pca=y_test[pretype]
        
        #建立线性回归
        pca_model = LinearRegression()
        pca_model.fit(x_train_pca, y_train_pca)
        pca_predict = pca_model.predict(x_test_pca)
        
        #评价指标
        
        rmse = RMSE(pca_predict,y_test_pca)
        mape = MAPE(pca_predict,y_test_pca)
        
        #保存训练结果
        # trainyear=[]
        # for t in y_test_pca:
        #     for d in final.values:
        #         if t>d[1]-5 and t<d[1]+5:
        #             trainyear.append(d[0])
        #             break

        trainyear=[]
        for t in y_test_pca:
            count=-1
            for d in final[pretype]:
                count+=1
                
                if t>d-5 and t<d+5:
                    # print("yes")
                    trainyear.append(final.index[count])
                    break
        
        #预测
        predata=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(int(PreStartYear),int(PreEndYear)+1))]
        predatatrain=pca.transform(predata[Index].values)
        predict=pca_model.predict(predatatrain)
        #PCA线性模型参数
        #pca_coef = pca_model.coef_
        
        #存储
        ytrain=pca_predict.tolist()
        ypre=np.array(predict).squeeze().tolist()
        
        result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain,"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre,"MAPE":mape,"RMSE":rmse}
        return result 
コード例 #5
0
def QuantileRegression(StartYear,
                       EndYear,
                       PreStartYear,
                       PreEndYear,
                       quatile=0.95,
                       pretype="全社会用电量",
                       econamelist=["GDP"],
                       city="云南省"):
    #首先需要回归得到未来的经济数据

    def get_coef(data, xnamelist, yname, quatile):
        #获得分位数回归线性关系
        #注意xnamelist 最多只能容纳5个变量,yname是str
        n = len(xnamelist)
        print(yname, xnamelist)
        if n == 1:
            mod = smf.quantreg('%s ~ %s' % (yname, xnamelist[0]), data)
        elif n == 2:
            mod = smf.quantreg(
                '%s ~ %s+%s' % (yname, xnamelist[0], xnamelist[1]), data)
        elif n == 3:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s' %
                (yname, xnamelist[0], xnamelist[1], xnamelist[2]), data)
        elif n == 4:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s+%s' % (yname, xnamelist[0], xnamelist[1],
                                      xnamelist[2], xnamelist[3]), data)
        elif n == 5:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s+%s+%s' %
                (yname, xnamelist[0], xnamelist[1], xnamelist[2], xnamelist[3],
                 xnamelist[4]), data)
        res = mod.fit(q=quatile)
        print(res.summary())
        #返回分位点,截距,各个参数系数 和 各个参数lb,ub
        return quatile, res.params['Intercept'], res.params[
            xnamelist], res.conf_int().loc[xnamelist]

    def predict(data, intercept, coef, quatile, xnamelist):
        #这里的data只有x没有y
        n = len(xnamelist)
        pre = [intercept] * len(data.values)
        for i in range(n):
            pre = pre + coef[xnamelist[i]] * data[xnamelist[i]].values
        return pre

    #判断经济因素数量是否合适
    if len(econamelist) > 5:
        delnum = len(econamelist) - 5
        raise ValueError("经济因素选取不应超出 5 个,请删去 %s 个,再重新预测" % delnum)
    elif int(PreEndYear) - int(PreStartYear) < 1:
        raise ValueError("该算法不支持一年及一年内的预测")
    elif (int(EndYear) - int(StartYear) + 1) < 5:
        raise ValueError("历史年份区间过短,建议历史年份区间在 5 年以上")
    elif city == "云南省":
        name = [pretype]
        finaldata = []
        period = int(PreEndYear) - int(PreStartYear) + 1

        #读取历史负荷数据
        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)

        #读取经济数据
        for i in range(len(econamelist)):

            ecodatajson = getData("云南省_year_社会经济类", econamelist[i], StartYear,
                                  EndYear)
            ecodata = json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])

        #获取最终数据DataFrame
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        #预测经济数据
        # print(logfinal[econamelist[0]].to_frame().column)
        eco = preeco.pre(final, econamelist[0], PreStartYear, PreEndYear)
        for j in range(1, len(econamelist)):
            c = preeco.pre(final, econamelist[j], PreStartYear, PreEndYear)
            eco = pd.merge(eco, c, on="year")

        q, b, k, lbub = get_coef(final, econamelist, pretype, 0.95)

        y = predict(eco, b, k, q, econamelist)
        #求mape,rmse
        ytrain = y[:len(y) - period]
        ytraintrue = final[pretype].values[:len(y) - period]
        mape = MAPE(ytrain, ytraintrue)
        rmse = RMSE(ytrain, ytraintrue)
        ypre = y[len(y) - period:]

        #返回结果
        result = {
            "trainfromyear": StartYear,
            "traintoyear": EndYear,
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
        return result
    else:
        raise ValueError("暂不支持其他地区预测")