コード例 #1
0
ファイル: Increase.py プロジェクト: Lyanf/ynpowerbackend
def Increase(StartYear,EndYear,PreStartYear,PreEndYear,rate,pretype="全社会用电量",city="云南省"):
    if city == "云南省":
        name=[pretype]
        finaldata=[]
        rate=rate/100
        #读取历史负荷数据
        datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data=json.loads(datajson)
        finaldata.append(data)
        
        
        #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
        
        realyear = np.arange(int(StartYear),int(EndYear)+1) 
        preyear  = np.arange(int(PreStartYear),int(PreEndYear)+1)

        final["time"]=realyear

        x = final[pretype].values        #load

        data = x[-1]
        
        
        ypre = [data*(1+rate)**(i+1) for i in range(len(preyear))]
        ypre=np.array(ypre)


        result={
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": 0,
            "RMSE": 0
        }
        return result
コード例 #2
0
ファイル: Growth.py プロジェクト: Lyanf/ynpowerbackend
def Growth(StartYear,
           EndYear,
           PreStartYear,
           PreEndYear,
           pretype="全社会用电量",
           econamelist="GDP",
           city="云南省",
           planflag=1,
           plan=1,
           pro=1):
    """
    

    Parameters
    ----------
    StartYear : TYPE
        DESCRIPTION.
    EndYear : TYPE
        DESCRIPTION.
    PreStartYear : TYPE
        DESCRIPTION.
    PreEndYear : TYPE
        DESCRIPTION.
    pretype : TYPE
        DESCRIPTION.
    econamelist : TYPE
        DESCRIPTION.
    city : TYPE, optional
        DESCRIPTION. The default is "云南省".
    planflag : TYPE, optional
        DESCRIPTION. The default is 0.
    plan : TYPE, optional
        DESCRIPTION. The default is 0.

    Returns
    -------
    TYPE
        DESCRIPTION.

    """
    def func3(params, x):
        a, b, c = params
        return np.exp(a / x + b) + c

    def error3(params, x, y):
        return func3(params, x) - y

    def slovePara3(x, y):
        p0 = [1, 0.02, 0]
        Para = leastsq(error3, p0, args=(x, y))
        return Para

    econamelist = [econamelist]
    if len(econamelist) != 1:
        raise ValueError("仅支持选择一个因素变量")

    elif city == "云南省":
        name = [pretype]
        finaldata = []

        #读取历史负荷数据
        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)

        #读取经济数据
        ecodatajson = getData("云南省_year_社会经济类", econamelist[0], StartYear,
                              EndYear)
        ecodata = json.loads(ecodatajson)
        finaldata.append(ecodata)
        name.append(econamelist[0])

        #获取最终数据DataFrame
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        x = final[econamelist[0]].values
        y = final[pretype].values  #load

        x = x.reshape(-1, 1)
        y = y.reshape(-1, 1)

        #区分训练数据和预测数据
        num = len(x)
        testyear = math.floor(num / 5)
        trainx = x[:num - testyear].squeeze()
        trainy = y[:num - testyear].squeeze()

        testx = x[num - testyear:]
        testy = y[num - testyear:]

        #建立模型
        Para = slovePara3(trainx, trainy)
        a, b, c = Para[0]

        testp = ic.getpred(testx, testyear, planflag, plan, pro)
        testp = np.array(testp).T
        testpm = []
        for i in range(51):
            testpm.append(np.mean(testp[i]))
        testpmm = testpm.index(np.median(testpm))
        testpredx = testp[testpmm]
        testpredx = [k * testx[-1] for k in testpredx]
        testpredy = [np.exp(a / x + b) + c for x in testpredx]

        trainyear = []
        for t in testy:
            count = -1
            for d in final[pretype]:
                count += 1

                if t > d - 5 and t < d + 5:
                    # print("yes")
                    trainyear.append(final.index[count])
                    break

        #误差
        mape = MAPE(testpredy, testy)
        rmse = RMSE(testpredy, testy)

        #预测
        x = x.squeeze()
        y = y.squeeze()
        Parapre = slovePara3(x, y)
        ap, bp, cp = Parapre[0]

        preyear = np.arange(int(PreStartYear), int(PreEndYear) + 1)
        year = len(preyear)
        p = ic.getpred(x, year, planflag, plan, pro)
        p = np.array(p).T
        pm = []
        for i in range(51):
            pm.append(np.mean(p[i]))
        pmm = pm.index(np.median(pm))
        predx = p[pmm]
        predx = [k * x[-1] for k in predx]

        predy = [np.exp(ap / x0 + bp) + cp for x0 in predx]
        predy = np.array(predy).squeeze()

        #存储
        ytrain = np.array(testpredy).squeeze()
        ypre = np.array(predy).squeeze()
        result = {
            "trainfromyear": trainyear[0],
            "traintoyear": trainyear[-1],
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
        return result
コード例 #3
0
ファイル: SVM.py プロジェクト: Lyanf/ynpowerbackend
def SVM(StartYear,
        EndYear,
        PreStartYear,
        PreEndYear,
        timestep,
        pretype="全社会用电量",
        city="云南省"):
    #读取数据,确定参数
    if timestep > (int(EndYear) - int(StartYear) + 1):
        raise ValueError("训练步长过大,请调整后重试")
    elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) +
                                          timestep):
        raise ValueError("历史时间长度小于预测时间长度与训练步长之和,请调整后重试")
    else:
        name = [pretype]
        finaldata = []
        outputlen = int(PreEndYear) - int(PreStartYear) + 1

        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        data = json.loads(datajson)
        finaldata.append(data)
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        test_size = 0  #测试数据集应当取0才可以
        X, y = generate_data(final,
                             timestep,
                             outputlen,
                             test_size=test_size,
                             if_norm="no")
        testdata = final[pretype].values
        testinput = []
        testoutput = []

        X, y = generate_data(final,
                             timestep,
                             outputlen,
                             test_size=test_size,
                             if_norm="no")
        svr = SVR(kernel="poly", gamma="scale", C=0.1)  #kernel="linear","poly"
        multi_model = MultiOutputRegressor(svr)
        multi_model.fit(X["train"], y["train"])

        testdata = final.values

        num = len(X["train"])
        selet = int(np.floor(num / 2))
        testinput = X["train"][selet:, :]
        testoutput = y["train"][selet:, :]

        y_svr = multi_model.predict(testinput)
        y_svr_real = np.array(y_svr).reshape(-1, 1)
        y_real = np.array(testoutput).reshape(-1, 1)

        mape = MAPE(y_svr_real, y_real)
        rmse = RMSE(y_svr_real, y_real)

        pre = multi_model.predict(
            np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape(
                1, -1))

        ytrain = y_svr[-1]
        trainyear = []
        for t in testoutput[-1]:
            count = -1
            for d in final[pretype]:
                count += 1
                if t > d - 1 and t < d + 1:
                    trainyear.append(final.index[count])
                    break

        ypre = np.array(pre).flatten()
        result = {
            "trainfromyear": trainyear[0],
            "traintoyear": trainyear[-1],
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
        #保存
        return result
コード例 #4
0
ファイル: BPNNIndustry.py プロジェクト: Lyanf/ynpowerbackend
def BPNNIndustry(StartYear,
                 EndYear,
                 PreStartYear,
                 PreEndYear,
                 timestep,
                 pretype,
                 city="云南省",
                 hidden=[24, 12],
                 learningrate=0.005,
                 epoch=1000):
    """

    Parameters
    ----------
    StartYear : TYPE
        DESCRIPTION.
    EndYear : TYPE
        DESCRIPTION.
    PreStartYear : TYPE
        DESCRIPTION.
    PreEndYear : TYPE
        DESCRIPTION.
    timestep : TYPE
        DESCRIPTION.
    pretype : TYPE, optional
        DESCRIPTION. The default is "consumption".
    city : TYPE, optional
        DESCRIPTION. The default is "云南省".
    hidden : TYPE, optional
        神经网络的隐藏层, list, 几个元素代表几层,每层神经元个数为list元素值. The default is [24,12].
    learningrate : TYPE, optional
        神经网络学习率. The default is 0.005.
    epoch : TYPE, optional
        训练学习次数. The default is 1000.

    Returns
    -------
    None.

    """
    def bpnn(timestep, outputlen, x_train, y_train, x_test, y_test, x_pre,
             hiddenneron, lr, epoch):
        x = tf.placeholder(tf.float32, shape=[None, timestep], name="Input")
        y = tf.placeholder(tf.float32, shape=[None, outputlen], name="Onput")
        hlen = len(hiddenneron)
        f = locals()
        for i in range(hlen + 1):
            if i == 0:
                f["f%s" % (i + 1)] = tf.contrib.layers.fully_connected(
                    x, hiddenneron[i])
            else:
                if i == hlen:
                    pre = tf.contrib.layers.fully_connected(
                        f["f%s" % (i)], outputlen)
                else:
                    f["f%s" % (i + 1)] = tf.contrib.layers.fully_connected(
                        f["f%s" % (i)], hiddenneron[i])

        loss = tf.losses.mean_squared_error(y, pre)

        train_op = tf.train.AdamOptimizer(lr).minimize(loss)

        saver = tf.train.Saver()

        with tf.Session() as sess:
            init = tf.initialize_all_variables()
            sess.run(init)
            for i in range(epoch):
                sess.run(train_op, feed_dict={x: x_train, y: y_train})
                lossz = sess.run(loss, feed_dict={x: x_train, y: y_train})
                if i % 50 == 0:
                    print(lossz)

            y_train_pre = sess.run(pre, feed_dict={x: x_train})

            y_test_pre = sess.run(pre, feed_dict={x: x_test})

            y_pre = sess.run(pre, feed_dict={x: x_pre})

            training = np.array(y_train_pre).squeeze()

            predictions = np.array(y_test_pre).squeeze()
            labels = np.array(y_test).squeeze()
            # saver.save(sess, "D:/lab/Yunnan_Pre/result/yunnan_shortterm_钢铁_BPNN/")
        return predictions, labels, y_pre, training

    if timestep > (int(EndYear) - int(StartYear) + 1) * 0.5:
        raise ValueError("训练步长过大,请调整后重试")
    elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) +
                                          timestep):
        raise ValueError("历史时间长度小于预测时间长度与训练步长之和, 请调整后重试")
    else:
        #读取数据,确定参数
        name = [pretype]
        finaldata = []
        outputlen = int(PreEndYear) - int(PreStartYear) + 1

        datajson = getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear)
        data = json.loads(datajson)
        finaldata.append(data)
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        test_size = 0  #测试数据集应当取0才可以
        X, y = generate_data(final,
                             timestep,
                             outputlen,
                             test_size=test_size,
                             if_norm="no")
        testdata = final[pretype].values
        testinput = []
        testoutput = []

        num = len(X["train"])
        selet = int(np.floor(num / 2))
        testinput = X["train"][selet:, :]
        testoutput = y["train"][selet:, :]

        x_pre = np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape(
            1, -1)

        test_pre, test_label, pre, training = bpnn(
            timestep, outputlen, X["train"][:-1, :], y["train"][:-1, :],
            testinput, testoutput, x_pre, hidden, learningrate, epoch)

        mape = MAPE(test_pre, test_label)
        rmse = RMSE(test_pre, test_label)

        #保存训练结果,年份上可能有问题
        #trainingtrue=y["train"][:-1,:].flatten()
        trainingtrue = y["train"][-1, :]

        trainyear = []
        for t in trainingtrue:
            count = -1
            for d in final[pretype]:
                count += 1

                if t > d - 5 and t < d + 5:
                    # print("yes")
                    trainyear.append(final.index[count])
                    break

        ytrain = training[-1]
        ypre = pre.flatten()

        #trainsave.to_csv("D:/lab/Yunnan_Pre/result/yunnan_shortterm_consumption_BPNN_training.csv")
        result = {
            "trainfromyear": trainyear[0],
            "traintoyear": trainyear[-1],
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
        #保存
        return result
コード例 #5
0
ファイル: Outlier.py プロジェクト: Lyanf/ynpowerbackend
def Outlier(StartYear,
            EndYear,
            datatype="电力电量类",
            pretype="全社会用电量",
            city="云南省"):
    #读取数据
    name = [pretype]
    finaldata = []
    year = np.arange(int(StartYear), int(EndYear) + 1, 1)

    datajson = getData("云南省_year_%s" % datatype, pretype, StartYear, EndYear)

    data = json.loads(datajson)
    if len(data) == 0:
        raise ValueError("%s 中不存在 %s-%s 年的%s 数据" %
                         (datatype, StartYear, EndYear, pretype))
    else:

        finaldata.append(data)
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        t = final[pretype]
        #print(t[abs(t-t.mean())> 1.0*t.std()])
        tnp = np.array(t.values)
        sigma = t[abs(t - t.mean()) > 1.5 * t.std()]
        #out=sigma.dropna(how="all")#异常值
        if len(sigma.values) == 0:
            result = {"outlier": [], "year": [], "correction": []}
            return result
        else:
            out = np.array(sigma.values).tolist()

            index_out = []
            for o in out:
                ind = 0 - 1
                for k in tnp:
                    ind += 1
                    if k == o:
                        index_out.append(ind)
                        break

            #index_out=np.where(tnp==out)[0]#异常值下标
            outyear = []
            outcorrect = []
            for i in index_out:
                outyear.append(year[i])
                if i == 0:
                    correct = (tnp[i + 1] + tnp[i]) / 2
                elif i == len(tnp) - 1:
                    correct = (tnp[i] + tnp[i - 1]) / 2
                else:
                    correct = (tnp[i + 1] + tnp[i - 1]) / 2
                outcorrect.append(correct)

            result = {
                "outlier": out,
                "year": outyear,
                "correction": outcorrect
            }
            return result
コード例 #6
0
def SARIMAIndustry(StartYear,EndYear,PreStartYear,PreEndYear,pretype,city="云南省"):
    StartMonth="%s/1"%(StartYear)
    EndMonth="%s/12"%(EndYear)
    #读取月度数据
    monthdatajson=getData("云南省_month_电力电量类-行业", pretype, StartMonth, EndMonth)
    monthdata=json.loads(monthdatajson)
    pdmonthdata=pd.DataFrame(monthdata,index=[pretype])
    pdmonthdata=pdmonthdata.T
    #读取年度数据
    yeardatajson=getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear)
    yeardata=json.loads(yeardatajson)
    pdyeardata=pd.DataFrame(yeardata,index=[pretype])
    pdyeardata=pdyeardata.T


    
    totalyear=int(EndYear)-int(StartYear)+1
    trainyear=math.floor(totalyear-totalyear*0.3)#2or5,意味着短期or中期
    train_num=trainyear*12
    train_data=pdmonthdata[pretype].values[:train_num]
    test_data=pdmonthdata[pretype].values[train_num:]
    
    
    mean = sum(train_data)/len(train_data) # 计算均值
    data_mean = [data - mean for data in train_data] # 得到去均值后的序列
    data_mean=np.array(data_mean)
    #做一阶差分差分,变量序列平稳.
    df_mean = pd.DataFrame(data_mean,index=pdmonthdata[pretype].values[:train_num],columns=['mean value'])
    df_mean_1 = np.diff(data_mean,1)
    # plt.plot(df_mean_1)
    # plt.show()
    
    # 进行ADF检验并打印结果
    adf_summary = ts.adfuller(np.array(df_mean_1).reshape(-1)) 
    # print(adf_summary)
    
    ###SARIMA-----ARIMA(p,d,q)(P,D,Q)s
    ### (p, d, q)是上述非季节性参数.(P, D, Q)遵循相同的定义.但适用于时间序列的季节分量. 术语s是时间序列的周期(季度为4 ,年度为12 ,等等).
    ###https://blog.csdn.net/weixin_39479282/article/details/89513624
    
    ##select the best parameter proup of SARIMA, using AIC (Akaike信息标准)
    
    # Define the p, d and q parameters to take any value between 0 and 2
    p=q=P=Q=range(0,2)#短期取得是(0,3)
    d=D=1#短期取得是1
    parameters = itertools.product(p,q,P,Q)
    parameters_list = list(parameters)
    
    
    warnings.filterwarnings("ignore")
    # param_best=tuple()
    # param_seasonal_best=tuple()
    result = []
    best_aic = float("inf")
    for parameters in parameters_list:
        try:
            model = sm.tsa.statespace.SARIMAX(df_mean_1,
                                            order=(parameters[0],d,parameters[1]),
                                            seasonal_order=(parameters[2], D, parameters[3], 12)).fit(disp=-1)
            # print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue
        aic = model.aic
        if aic < best_aic:
            best_aic = aic
            best_param = parameters
        result.append([parameters, model.aic])
    
    # result_table = pd.DataFrame(result)
    # result_table.columns = ['parameters', 'aic']
    # print(result_table.sort_values(by='aic', ascending=True).head())
    
    ###prediction   
    best_model=sm.tsa.statespace.SARIMAX(train_data,
                                            order=(best_param[0],d,best_param[1]),
                                            seasonal_order=(best_param[2], D, best_param[3], 12)).fit(disp=-1)
    
    test_predict=best_model.forecast(steps=len(test_data))
    
    #将月度数据转化为年度数据
    def month_to_year(test_predict):
        finalpredict=[]
        loadsum=0
        for i in range(len(test_predict)):
            if ((i+1)%12==0) and loadsum!=0:
                loadsum=loadsum+test_predict[i]
                finalpredict.append(loadsum)
                loadsum=0
            else:
                loadsum=loadsum+test_predict[i]
        finalpredict=np.array(finalpredict)
        return finalpredict
    
    finalpredict=month_to_year(test_predict)
    finaltrue=np.flipud(pdyeardata[pretype].values[-1:-(len(finalpredict)+1):-1])
    mape=MAPE(finalpredict,finaltrue)
    rmse=RMSE(finalpredict,finaltrue)
    
    trainyear=[]
    for t in finaltrue:
        for year, data in pdyeardata.iterrows():
            if t>data[pretype]-5 and t<data[pretype]+5:
                trainyear.append(year)
                break
    ytrain=np.array(finalpredict)
    
    #预测

    
    outputlen=int(PreEndYear)-int(PreStartYear)+1
    traindata=pdmonthdata[pretype].values
    best_model=sm.tsa.statespace.SARIMAX(traindata,
                                            order=(best_param[0],d,best_param[1]),
                                            seasonal_order=(best_param[2], D, best_param[3], 12)).fit(disp=-1)
    predict=best_model.forecast(steps=outputlen*12)
    finalpredict=month_to_year(predict)
    ypre=np.array(finalpredict)
    
    
    result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse}
    return result
コード例 #7
0
ファイル: algorithm.py プロジェクト: Lyanf/ynpowerbackend
def algorithm1(data,file,start,end):
    resultJson = getData(file, data, start,end)
    return resultJson
コード例 #8
0
ファイル: LDM.py プロジェクト: Lyanf/ynpowerbackend
def LDM(PreStartYear,
        PreEndYear,
        buildingarea,
        loaddensity,
        pretype="全社会用电量",
        city="云南省"):
    def Density(n, Dlist, Plist):
        #n为所画片区,Dlist为对应的负荷密度,Plist为对应的建筑面积
        load = 0
        for i in range(n):
            load = Dlist[i] * Plist[i] + load

        return load

    data1 = pd.read_csv(buildingarea, encoding="UTF-8")
    data2 = pd.read_csv(loaddensity, encoding="UTF-8")
    columns = data1.columns
    columns2 = data2.columns

    if len(columns) != len(columns2):
        raise ValueError("负荷密度和建筑密度列表不匹配,请重新上传")
    elif not (data1[columns[0]].values == data2[columns2[0]].values).all():
        raise ValueError("负荷密度和建筑密度列表不匹配,请重新上传")
    else:
        StartYear = str(data1[columns[0]].values[0])
        EndYear = str(data1[columns[0]].values[-1])
        #预测建筑用地数据
        building = predict.pre(data1.loc[:, [columns[0], columns[1]]],
                               columns[1], int(PreStartYear), int(PreEndYear))
        for i in range(2, len(columns)):
            c = predict.pre(data1.loc[:, [columns[0], columns[i]]], columns[i],
                            int(PreStartYear), int(PreEndYear))
            building = pd.merge(building, c, on=columns[0])

        #预测负荷密度
        density = predict.pre(data2.loc[:, [columns2[0], columns2[1]]],
                              columns2[1], int(PreStartYear), int(PreEndYear))
        for i in range(2, len(columns2)):
            c = predict.pre(data2.loc[:, [columns2[0], columns2[i]]],
                            columns2[i], int(PreStartYear), int(PreEndYear))
            density = pd.merge(density, c, on=columns2[0])

        #读取历史负荷数据
        period = int(EndYear) - int(StartYear) + 1
        finaldata = []
        name = [pretype]
        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        data = json.loads(datajson)
        finaldata.append(data)

        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        trainx = []
        start = 0  #训练集的起始位置
        for i in range(start, period):
            d = [building[columns[-1]].values[i]]
            b = [density[columns[-1]].values[i]]
            trainx.append(Density(1, d, b))

        trainy = []
        trainyear = []
        for j in range(period):
            if int(final.index.values[j]) in data1["year"].values[start:]:
                trainy.append(final[pretype].values[j])
                trainyear.append(final.index.values[j])

        prex = []

        for a in range(period, len(building.values)):
            d = [building[columns[-1]].values[a]]
            b = [density[columns[-1]].values[a]]
            prex.append(Density(1, d, b))

        trainx = np.array(trainx).reshape(-1, 1)
        trainy = np.array(trainy).reshape(-1, 1)
        prex = np.array(prex).reshape(-1, 1)

        #训练模型
        reg = LinearRegression().fit(trainx, trainy)
        prey = [x * reg.coef_[0][0] + reg.intercept_[0] for x in prex]

        pretrainy = [tx * reg.coef_[0][0] + reg.intercept_[0] for tx in trainx]
        ypre = np.array(prey).reshape(1, -1).squeeze()
        ytrain = np.array(pretrainy).reshape(1, -1)

        mape = MAPE(pretrainy, trainx)
        rmse = RMSE(pretrainy, trainx)

        #返回结果
        result = {
            "trainfromyear": StartYear,
            "traintoyear": EndYear,
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
    return result
コード例 #9
0
def GPRM(StartYear,
         EndYear,
         PreStartYear,
         PreEndYear,
         timestep,
         pretype="全社会用电量",
         city="云南省"):
    def improve_GM(x, n):
        '''
        改进灰色预测
        x:序列,numpy对象
        n:需要往后预测的个数
        '''
        x1 = x.cumsum()  #一次累加
        z1 = (x1[:len(x1) - 1] + x1[1:]) / 2.0  #紧邻均值
        z1 = z1.reshape((len(z1), 1))
        B = np.append(-z1, np.ones_like(z1), axis=1)
        Y = x[1:].reshape((len(x) - 1, 1))
        #a为发展系数 b为灰色作用量
        try:
            [[a], [b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T),
                                Y)  #计算参数
        except:
            raise ValueError("中间矩阵不可逆,请重新调整历史数据时间或步长")
        #result = (x[0]-b/a)*np.exp(-a*(n-1))-(x[0]-b/a)*np.exp(-a*(n-2))
        S1_2 = x.var()  #原序列方差
        e = list()  #残差序列
        for index in range(1, x.shape[0] + 1):
            predict = (x[0] - b / a) * np.exp(
                -a * (index - 1)) - (x[0] - b / a) * np.exp(-a * (index - 2))
            e.append(x[index - 1] - predict)
        S2_2 = np.array(e).var()  #残差方差
        C = S2_2 / S1_2  #后验差比
        if C <= 0.35:
            assess = '后验差比<=0.35,模型精度等级为好'
        elif C <= 0.5:
            assess = '后验差比<=0.5,模型精度等级为合格'
        elif C <= 0.65:
            assess = '后验差比<=0.65,模型精度等级为勉强'
        else:
            assess = '后验差比>0.65,模型精度等级为不合格'
        #预测数据
        predict = list()
        for index in range(x.shape[0] + 1, x.shape[0] + n + 1):
            predict.append((x[0] - b / a) * np.exp(-a * (index - 1)) -
                           (x[0] - b / a) * np.exp(-a * (index - 2)))
        predict = np.array(predict)
        return predict, a, b, assess

    def GMpre(x, n, a, b):
        predict = list()
        for index in range(x.shape[0] + 1, x.shape[0] + n + 1):
            predict.append((x[0] - b / a) * np.exp(-a * (index - 1)) -
                           (x[0] - b / a) * np.exp(-a * (index - 2)))
        predict = np.array(predict)
        return predict

    if timestep > (int(EndYear) - int(StartYear) + 1):
        raise ValueError("训练步长过大,请调整后重试.")
    # elif int(PreEndYear)-int(PreStartYear)<1:
    #     raise ValueError("该算法不支持一年及一年内的预测.")
    elif timestep < (int(PreEndYear) - int(PreStartYear) + 2):
        raise ValueError("训练步长小于预测年份区间长度,请增加训练步长.")
    else:
        """负荷预测"""
        name = [pretype]
        finaldata = []

        datayear = np.arange(int(StartYear), int(EndYear) + 1)

        #读取历史负荷数据
        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        datafinalyear = int(EndYear)
        trainyear = timestep
        testyear = int(PreEndYear) - int(PreStartYear) + 1

        y = final.values
        y = y.reshape(-1, 1)

        #区分训练数据和预测数据
        num = len(y)
        #训练集
        trainx = y[num - testyear - 1 - trainyear:num - testyear - 1].squeeze()
        trainy = y[num - testyear - 1:num - 1].squeeze()
        #测试集
        testx = y[num - testyear - trainyear:num - testyear].squeeze()
        testy = y[num - testyear:]
        if len(testy) > 1:
            testy = testy.squeeze()
        #开始训练
        trainpre, a, b, assess = improve_GM(trainx, testyear)
        #获得测试结果
        testpre = GMpre(testx, testyear, a, b)

        #获得最终预测
        testpredx = np.array(np.flipud(y[-1:-(trainyear + 1):-1]))
        finalpre = GMpre(testpredx, testyear, a, b)

        mape = MAPE(testpre, testy)
        rmse = RMSE(testpre, testy)

        ypre = finalpre.reshape(1, -1).squeeze()

        trainyear = datayear[num - testyear:]
        # for t in testy:
        #     count=-1
        #     for d in final[pretype]:
        #         count+=1

        #         if t>d-5 and t<d+5:
        #             # print("yes")
        #             trainyear.append(final.index[count])
        #             break

        result = {
            "trainfromyear": trainyear[0],
            "traintoyear": trainyear[-1],
            "trainresult": trainpre.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
        #保存
        return result
コード例 #10
0
ファイル: Logarithm.py プロジェクト: Lyanf/ynpowerbackend
def Logarithm(StartYear,EndYear,PreStartYear,PreEndYear,pretype="全社会用电量",econamelist="GDP",city="云南省",planflag=1,plan=1,pro=1):

    
    """对数函数"""
    
    def func5(params, x):
        a, b = params
        return a * np.log(x) + b 
    
    def error5(params, x, y):
        return func5(params, x) - y
    
    def slovePara5(x,y):
        p0 = [1, 0.02]
        Para = leastsq(error5, p0, args=(x, y))
        return Para
    
    
    econamelist=[econamelist]
    if len(econamelist) !=1:
        raise ValueError("仅支持选择一个因素变量") 
    
    elif city=="云南省":
        name=[pretype]
        finaldata=[]
        
        #读取历史负荷数据
        datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data=json.loads(datajson)
        finaldata.append(data)
        
        #读取经济数据
        ecodatajson=getData("云南省_year_社会经济类", econamelist[0], StartYear, EndYear)
        ecodata=json.loads(ecodatajson)
        finaldata.append(ecodata)
        name.append(econamelist[0])
        
        #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
        
        x = final[econamelist[0]].values
        y = final[pretype].values        #load


        x = x.reshape(-1,1)
        y = y.reshape(-1,1)


        #区分训练数据和预测数据
        num=len(x)
        testyear=math.floor(num/5)
        trainx=x[:num-testyear].squeeze()
        trainy=y[:num-testyear].squeeze()
        
        testx=x[num-testyear:]
        testy=y[num-testyear:]
        

        Para = slovePara5(trainx,trainy)
        a, b = Para[0]
        
        testp = ic.getpred(testx,testyear,planflag,plan,pro)
        testp = np.array(testp).T
        testpm = []
        for i in range(51):
            testpm.append(np.mean(testp[i]))
        testpmm = testpm.index(np.median(testpm))
        testpredx = testp[testpmm]
        testpredx = [k * testx[-1] for k in testpredx]
        testpredy = [a*np.log (x) + b for x in testpredx]


        trainyear=[]
        for t in testy:
            count=-1
            for d in final[pretype]:
                count+=1
                
                if t>d-5 and t<d+5:
                    # print("yes")
                    trainyear.append(final.index[count])
                    break       
        
        mape=MAPE(testpredy,testy)
        rmse=RMSE(testpredy,testy)
        
        x=x.squeeze()
        y=y.squeeze()
        Parapre = slovePara5(x,y)
        ap, bp = Parapre[0]
        
        
        preyear = np.arange(int(PreStartYear),int(PreEndYear)+1)
        year=len(preyear)
        
        p = ic.getpred(x,year,planflag,plan,pro)
        p = np.array(p).T
        pm = []
        for i in range(51):
            pm.append(np.mean(p[i]))
        pmm = pm.index(np.median(pm))
        predx = p[pmm]
        predx = [k * x[-1] for k in predx]
            
        predy = [ap*np.log (x0) + bp for x0 in predx]
        predy=np.array(predy).squeeze()
        

        
        #存储
        ytrain=np.array(testpredy).squeeze()
        ypre=np.array(predy).squeeze()
        result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse}
        return result
コード例 #11
0
ファイル: FER.py プロジェクト: Lyanf/ynpowerbackend
def FER(StartYear,EndYear,PreStartYear,PreEndYear,timestep,pretype="全社会用电量",city="云南省"):
    """
    

    Parameters
    ----------
    StartYear : TYPE
        DESCRIPTION.
    EndYear : TYPE
        DESCRIPTION.
    PreStartYear : TYPE
        DESCRIPTION.
    PreEndYear : TYPE
        DESCRIPTION.
    timestep : TYPE, optional
        DESCRIPTION. The default is 15.
    pretype : TYPE, optional
        DESCRIPTION. The default is "consumption".
    city : TYPE, optional
        DESCRIPTION. The default is "云南省".

    Returns
    -------
    result : TYPE
        DESCRIPTION.

    """
    def exponential_smoothing(series, alpha):
        #一次指数平滑
        result = [series[0]] # first value is same as series
        for n in range(1, len(series)):
            result.append(alpha * series[n] + (1 - alpha) * result[n-1])
        return result
    
    def double_exponential_smoothing(series, alpha, beta):
        #二次指数平滑
        result = [series[0]]
        for n in range(1, len(series)):
            if n == 1: # initialize
                level, trend = series[0], series[1] - series[0]
            if n >= len(series): # we are forecasting
              value = result[-1]
            else:
              value = series[n]
            last_level, level = level, alpha*value + (1-alpha)*(level+trend) # a-hat at t
            trend = beta*(level-last_level) + (1-beta)*trend # b-hat at t
            final=level+trend
            result.append(final)
        return result
    if timestep > (int(EndYear)-int(StartYear)+1):
        raise ValueError("训练步长过大,请调整后重试.")
    elif int(PreEndYear)-int(PreStartYear)<1:
        raise ValueError("该算法不支持一年及一年内的预测.")
    elif timestep<(int(PreEndYear)-int(PreStartYear)+2):
        raise ValueError("训练步长小于预测年份区间长度,请增加训练步长.")
    else:
        #读取数据
        datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        data=json.loads(datajson)
        
        name=[pretype]
        finaldata=[]
        finaldata.append(data)
        final=pd.DataFrame(finaldata,index=name)
    
        period=int(PreEndYear)-int(PreStartYear)+1
    
        econamelist=["第一产业GDP","第二产业GDP","第三产业GDP"]
        #读取经济数据
        for i in range(len(econamelist)):
            
            ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear)
            ecodata=json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])
            #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
        #获取训练所用的数据集
        data1=final.iloc[len(final.values)-timestep:]
        num=len(data1.values)
        
        #预测经济数据
        eco=predict.pre(data1,econamelist[0],PreStartYear,PreEndYear)
        for j in range(1,len(econamelist)):
            c=predict.pre(data1,econamelist[j],PreStartYear,PreEndYear)
            eco=pd.merge(eco,c,on="year")  
    
        #获得训练集和测试集
        trainx=eco.loc[:,econamelist]
        trainy=data1.loc[:,pretype]
        prex=eco.loc[num:,econamelist]
    
    
    
        #创建模糊控制变量
        
        GDP1=ctrl.Antecedent(np.arange( 100, 15000, 20 ), "gdp1" )
        GDP2=ctrl.Antecedent(np.arange( 150, 20000, 20 ), "gdp2" )
        # GDP3=ctrl.Antecedent(np.arange( 100, 25000, 20 ), "gdp3" )
        fuload=ctrl.Consequent(np.arange( 100, 8000, 1 ), "futureload" )
        
        #定义模糊集和其隶属度函数
        
        GDP1[ "very low" ] = fuzz.trimf( GDP1.universe, [ 100, 300, 500 ] )
        GDP1[ "low" ] = fuzz.trimf( GDP1.universe, [ 400, 850, 1250 ] )
        GDP1[ "medium" ] = fuzz.trimf( GDP1.universe, [ 1000, 2500, 4000 ] )
        GDP1[ "high" ] = fuzz.trimf( GDP1.universe, [ 3700, 5500, 7500] )
        GDP1[ "very high" ] = fuzz.trimf( GDP1.universe, [ 7300, 12000, 15000] )
        
        GDP2[ "very low" ] = fuzz.trimf(GDP2.universe, [ 100, 500, 900 ] )
        GDP2[ "low" ] = fuzz.trimf(GDP2.universe, [ 500, 1450, 2600 ] )
        GDP2[ "medium" ] = fuzz.trimf(GDP2.universe, [ 2500, 6500, 10500 ] )
        GDP2[ "high" ] = fuzz.trimf(GDP2.universe, [ 9500, 12000, 14000] )
        GDP2[ "very high" ] = fuzz.trimf(GDP2.universe, [ 13500, 16000, 20000] )
    
        # GDP3[ "very low" ] = fuzz.trimf(GDP3.universe, [ 100, 400, 700 ] )
        # GDP3[ "low" ] = fuzz.trimf(GDP3.universe, [ 650, 1400, 2750 ] )
        # GDP3[ "medium" ] = fuzz.trimf(GDP3.universe, [ 2600, 6000, 13000 ] )
        # GDP3[ "high" ] = fuzz.trimf(GDP3.universe, [ 12000, 15000, 18000] )
        # GDP3[ "very high" ] = fuzz.trimf(GDP3.universe, [ 17000, 21000, 25000] )
    
        fuload[ "very low" ] = fuzz.trimf( fuload.universe, [ 100, 200, 300 ] )
        fuload[ "low" ] = fuzz.trimf( fuload.universe, [ 250, 550, 1100 ] )
        fuload[ "medium" ] = fuzz.trimf( fuload.universe, [ 1050, 1900, 3000 ] )
        fuload[ "high" ] = fuzz.trimf( fuload.universe, [ 2750, 3500, 5100 ] )
        fuload[ "very high" ] = fuzz.trimf(fuload.universe, [ 5000, 8000, 8000 ] )
        
        # #定义模糊规则
        rule=locals()
        rule1 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "very low" ], fuload[ "very low" ] )
        rule2 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "low" ], fuload[ "very low" ] )
        rule3 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "medium" ], fuload[ "low" ] )
        rule4 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "high"  ], fuload[ "medium" ] )
        rule5 = ctrl.Rule(GDP1[ "very low" ]&GDP2[ "very high" ], fuload[ "medium" ] )
        
        rule6 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "very low" ], fuload["very low" ] )
        rule7 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "low" ], fuload[ "low"  ] )
        rule8 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "medium" ], fuload[ "low"  ]  )
        rule9 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "high"  ], fuload["medium" ]  )
        rule10 = ctrl.Rule(GDP1[ "low" ]&GDP2[ "very high"  ], fuload["medium" ] )
        
        rule11 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "very low" ], fuload["low" ] )
        rule12 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "low"  ], fuload["low" ] )
        rule13 = ctrl.Rule(GDP1[ "medium" ]&GDP2[ "medium" ], fuload["medium" ] )
        rule14 = ctrl.Rule(GDP1[  "medium" ]&GDP2[ "high"  ], fuload["high" ] )
        rule15 = ctrl.Rule(GDP1[  "medium" ]&GDP2[ "very high" ], fuload["medium" ] )
        
        rule16 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "very low" ], fuload["low" ] )
        rule17 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "low"  ], fuload["medium" ] )
        rule18 = ctrl.Rule(GDP1[ "high" ]&GDP2[ "medium" ], fuload["high"] )
        rule19 = ctrl.Rule(GDP1[  "high" ]&GDP2[ "high"  ], fuload["high" ] )
        rule20 = ctrl.Rule(GDP1[  "high" ]&GDP2[ "very high" ], fuload["very high" ] )
        
        rule21 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "very low" ], fuload["low" ] )
        rule22 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "low"  ], fuload["low"  ] )
        rule23 = ctrl.Rule(GDP1[ "very high" ]&GDP2[ "medium" ], fuload["medium" ] )
        rule24 = ctrl.Rule(GDP1[  "very high" ]&GDP2[ "high"  ], fuload["high" ] )
        rule25 = ctrl.Rule(GDP1[  "very high" ]&GDP2[ "very high" ], fuload["very high" ] )
        fuzzy_ctrl = ctrl.ControlSystem([ rule1, rule2, rule3, rule4, rule5,rule6, rule7, rule8, rule9, rule10,
                                         rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, rule20,
                                         rule21, rule22, rule23, rule24, rule25])
    
        consumptionSystem = ctrl.ControlSystemSimulation( fuzzy_ctrl )
    
        #评估
        trainn=len(trainx)
    
        
        systemoutput=np.zeros(trainn, dtype=np.float64 )
        
        
        for i in range(trainn):
            consumptionSystem.input["gdp1"] = trainx.loc[i,econamelist[0]]
            consumptionSystem.input["gdp2"] = trainx.loc[i,econamelist[1]]
            consumptionSystem.compute()
            systemoutput[i] = consumptionSystem.output["futureload"]
    
    
        alpha=0.9
        beta=1
        #对结果进行二次指数平滑
        allexsystemoutput=double_exponential_smoothing(systemoutput[:num], alpha, beta)
        exsystemoutput=double_exponential_smoothing(systemoutput[num-period:num], alpha, beta)
        exprey=double_exponential_smoothing(systemoutput[num:], alpha, beta)
    
        mape=MAPE(exsystemoutput,trainy.values[num-period:num])
        rmse=RMSE(exsystemoutput,trainy.values[num-period:num])
        #保存结果
    
        trainyear=data1.index
        ytrain=np.array(allexsystemoutput).reshape(1,-1).squeeze()
        ypre=np.array(exprey).reshape(1,-1).squeeze()
    
        result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse}
    
        return result
コード例 #12
0
ファイル: Binarylinear.py プロジェクト: Lyanf/ynpowerbackend
def Binarylinear(StartYear,EndYear,PreStartYear,PreEndYear,econamelist,pretype="全社会用电量",city="云南省",planflag1=1,plan1=1,pro1=1,planflag2=1,plan2=1,pro2=1):
    """
    

    Parameters
    ----------
    StartYear : TYPE
        DESCRIPTION.
    EndYear : TYPE
        DESCRIPTION.
    PreStartYear : TYPE
        DESCRIPTION.
    PreEndYear : TYPE
        DESCRIPTION.
    pretype : TYPE
        DESCRIPTION.
    econamelist : TYPE
        DESCRIPTION.
    city : TYPE, optional
        DESCRIPTION. The default is "云南省".
    planflag1 : TYPE, optional
        DESCRIPTION. The default is 0.
    plan1 : TYPE, optional
        DESCRIPTION. The default is 0.
    planflag2 : TYPE, optional
        DESCRIPTION. The default is 0.
    plan2 : TYPE, optional
        DESCRIPTION. The default is 0.

    Returns
    -------
    TYPE
        DESCRIPTION.

    """
    def madd(X,Y):
        Z = []
        lenX = len(X)
        for i in range(lenX):
                Z.append(X[i][0]+Y[i][0])
        return Z
    
    
    if len(econamelist) !=2:
        return {"False":"请重新选择两个经济变量."}
    elif city=="云南省":
        name=[pretype]
        finaldata=[]
        period=int(PreEndYear)-int(PreStartYear)+1
        
        #读取历史负荷数据
        datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data=json.loads(datajson)
        finaldata.append(data)
        
        #读取经济数据
        for i in range(2):
            ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear)
            ecodata=json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])
        
        #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T  
        
        

        x1 = final[econamelist[0]].values
        x2 = final[econamelist[1]].values
        y = final[pretype].values        #load


        x1 = x1.reshape(-1,1)
        x2 = x2.reshape(-1,1)
        xx=np.concatenate((x1,x2),axis=1)
        y = y.reshape(-1,1)


        #区分训练数据和预测数据
        num=len(y)
        testyear=math.floor(num/5)
        trainx=xx[:num-testyear]
        trainy=y[:num-testyear]
        
        testx=xx[num-testyear:]
        testy=y[num-testyear:]
        
        # reg = LinearRegression().fit(trainx, trainy)
        
        reg = LinearRegression().fit(xx, y)
        
        testp1 = ic.getpred(testx[:,0],testyear,planflag1,plan1,pro1)
        testp1 = np.array(testp1).T
        testpm1 = []
        for i in range(51):
            testpm1.append(np.mean(testp1[i]))
            
        testpmm1 = testpm1.index(np.median(testpm1))
        testpredx1 = testp1[testpmm1]
        testpredx1 = [k * testx[:,0][-1] for k in testpredx1]
        print(testpredx1)
        testpredy1 = [testx[:,0] * reg.coef_[0][0] + reg.intercept_[0] for testx[:,0] in testpredx1]
        
        
        
        testp2 = ic.getpred(testx[:,1],testyear,planflag2,plan2,pro2)
        testp2 = np.array(testp2).T
        testpm2 = []
        for i in range(51):
            testpm2.append(np.mean(testp2[i]))
        testpmm2 = testpm2.index(np.median(testpm2))
        testpredx2 = testp2[testpmm2]
        testpredx2 = [k * testx[:,1][-1] for k in testpredx2]
        testpredy2 = [testx[:,1] * reg.coef_[0][1] for testx[:,1] in testpredx2]
        
        testpredy = madd(testpredy1 , testpredy2)
        # testpredy=np.array(testpredy).squeeze()
        
        # loadp = reg.predict(testx)#趋势外推
        
        mape=MAPE(testpredy,testy)
        rmse=RMSE(testpredy,testy)

        trainyear=[]
        for t in testy:
            count=-1
            for d in final[pretype]:
                count+=1
                
                if t>d-5 and t<d+5:
                    # print("yes")
                    trainyear.append(final.index[count])
                    break


        """预测"""       
        preyear = np.arange(int(PreStartYear),int(PreEndYear)+1)
        year=len(preyear)
        p1 = ic.getpred(xx[:,0],year,planflag1,plan1,pro1)
        p1 = np.array(p1).T
        pm1 = []
        for i in range(51):
            pm1.append(np.mean(p1[i]))
            
        pmm1 = pm1.index(np.median(pm1))
        predx1 = p1[pmm1]
        predx1 = [k * xx[:,0][-1] for k in predx1]
        predy1 = [xx[:,0] * reg.coef_[0][0] + reg.intercept_[0] for xx[:,0] in predx1]
        
        
        
        p2 = ic.getpred(xx[:,1],year,planflag2,plan2,pro2)
        p2 = np.array(p2).T
        pm2 = []
        for i in range(51):
            pm2.append(np.mean(p2[i]))
            
        pmm2 = pm2.index(np.median(pm2))
        predx2 = p2[pmm2]
        predx2 = [k * xx[:,1][-1] for k in predx2]
        predy2 = [xx[:,1] * reg.coef_[0][1] for xx[:,1] in predx2]
        
        predy = madd(predy1 , predy2)
        predy=np.array(predy).squeeze()
        
        #存储
        ytrain=np.array(testpredy).squeeze()
        ypre=np.array(predy).squeeze()
        result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse}
        return result        
コード例 #13
0
def QuantileRegression(StartYear,
                       EndYear,
                       PreStartYear,
                       PreEndYear,
                       quatile=0.95,
                       pretype="全社会用电量",
                       econamelist=["GDP"],
                       city="云南省"):
    #首先需要回归得到未来的经济数据

    def get_coef(data, xnamelist, yname, quatile):
        #获得分位数回归线性关系
        #注意xnamelist 最多只能容纳5个变量,yname是str
        n = len(xnamelist)
        print(yname, xnamelist)
        if n == 1:
            mod = smf.quantreg('%s ~ %s' % (yname, xnamelist[0]), data)
        elif n == 2:
            mod = smf.quantreg(
                '%s ~ %s+%s' % (yname, xnamelist[0], xnamelist[1]), data)
        elif n == 3:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s' %
                (yname, xnamelist[0], xnamelist[1], xnamelist[2]), data)
        elif n == 4:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s+%s' % (yname, xnamelist[0], xnamelist[1],
                                      xnamelist[2], xnamelist[3]), data)
        elif n == 5:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s+%s+%s' %
                (yname, xnamelist[0], xnamelist[1], xnamelist[2], xnamelist[3],
                 xnamelist[4]), data)
        res = mod.fit(q=quatile)
        print(res.summary())
        #返回分位点,截距,各个参数系数 和 各个参数lb,ub
        return quatile, res.params['Intercept'], res.params[
            xnamelist], res.conf_int().loc[xnamelist]

    def predict(data, intercept, coef, quatile, xnamelist):
        #这里的data只有x没有y
        n = len(xnamelist)
        pre = [intercept] * len(data.values)
        for i in range(n):
            pre = pre + coef[xnamelist[i]] * data[xnamelist[i]].values
        return pre

    #判断经济因素数量是否合适
    if len(econamelist) > 5:
        delnum = len(econamelist) - 5
        raise ValueError("经济因素选取不应超出 5 个,请删去 %s 个,再重新预测" % delnum)
    elif int(PreEndYear) - int(PreStartYear) < 1:
        raise ValueError("该算法不支持一年及一年内的预测")
    elif (int(EndYear) - int(StartYear) + 1) < 5:
        raise ValueError("历史年份区间过短,建议历史年份区间在 5 年以上")
    elif city == "云南省":
        name = [pretype]
        finaldata = []
        period = int(PreEndYear) - int(PreStartYear) + 1

        #读取历史负荷数据
        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)

        #读取经济数据
        for i in range(len(econamelist)):

            ecodatajson = getData("云南省_year_社会经济类", econamelist[i], StartYear,
                                  EndYear)
            ecodata = json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])

        #获取最终数据DataFrame
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        #预测经济数据
        # print(logfinal[econamelist[0]].to_frame().column)
        eco = preeco.pre(final, econamelist[0], PreStartYear, PreEndYear)
        for j in range(1, len(econamelist)):
            c = preeco.pre(final, econamelist[j], PreStartYear, PreEndYear)
            eco = pd.merge(eco, c, on="year")

        q, b, k, lbub = get_coef(final, econamelist, pretype, 0.95)

        y = predict(eco, b, k, q, econamelist)
        #求mape,rmse
        ytrain = y[:len(y) - period]
        ytraintrue = final[pretype].values[:len(y) - period]
        mape = MAPE(ytrain, ytraintrue)
        rmse = RMSE(ytrain, ytraintrue)
        ypre = y[len(y) - period:]

        #返回结果
        result = {
            "trainfromyear": StartYear,
            "traintoyear": EndYear,
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
        return result
    else:
        raise ValueError("暂不支持其他地区预测")
コード例 #14
0
ファイル: LSTMpre.py プロジェクト: Lyanf/ynpowerbackend
def LSTMpre(StartYear,EndYear,PreStartYear,PreEndYear,timestep,pretype="全社会用电量",city="云南省", hidden_size=24,hidden_layer=1, learningrate=0.005,epoch=1000):

    #搭建LSTM模块
    def LSTM(x,y,outputlen,is_training,hidden_size,num_layers,lr,optimizer,keep_pro):
        cell=tf.nn.rnn_cell.BasicLSTMCell
        if is_training and keep_pro<1:
            lstmcell=tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(cell(hidden_size,activation=tf.nn.softsign),output_keep_prob=keep_pro) for _ in range(num_layers)])
        else:
            lstmcell=tf.nn.rnn_cell.MultiRNNCell([cell(hidden_size) for _ in range(num_layers)])
        x=tf.expand_dims(x,axis=2)
        outputs,current_state=tf.nn.dynamic_rnn(lstmcell,x,dtype=tf.float32)
        output=outputs[:,-1,:]
        predictions=tf.contrib.layers.fully_connected(output,outputlen)
        
        if not is_training:
            return predictions,None,None
        loss=tf.losses.absolute_difference(labels=y,predictions=predictions)
        train_op=tf.contrib.layers.optimize_loss(loss,tf.train.get_global_step(),optimizer=optimizer,learning_rate=lr)
        return predictions,loss,train_op
    
    #训练模型模块
    def trainmodel(sess,outputlen,train_x,train_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step):
        ds=tf.data.Dataset.from_tensor_slices((train_x,train_y))
        ds=ds.repeat().shuffle(100).batch(batch_size)
        x,y=ds.make_one_shot_iterator().get_next()
        prediction,loss,train_op=LSTM(x,y,outputlen,True,hidden_size,num_layers,lr,optimizer,keep_pro)
        losses=[]
        sess.run(tf.global_variables_initializer())
        ytrain=[]
        for j in range(training_step):
            y,p,l=sess.run([prediction,train_op,loss])
            ytrain.append(y)
        return ytrain
    
    
    #测试模型模块    
    def runmodel(sess,outputlen,test_x,test_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step):
        ds=tf.data.Dataset.from_tensor_slices((test_x,test_y))
        ds=ds.batch(1)
        x,y=ds.make_one_shot_iterator().get_next()
        prediction,_,_=LSTM(x,[0.0],outputlen,False,hidden_size,num_layers,lr,optimizer,keep_pro)
        pre=[]
        label=[]
        for j in range(len(test_y)):

            p,l=sess.run([prediction,y])
            pre.append(p)
            label.append(l)

        pre=np.array(pre).squeeze()
        labels=np.array(label).squeeze()


        return pre,labels
    
        #预测模型模块    
    def premodel(sess,outputlen,test_x,test_y,hidden_size,num_layers,lr,optimizer,keep_pro,batch_size,training_step):

        prediction,_,_=LSTM(test_x,[0.0],outputlen,False,hidden_size,num_layers,lr,optimizer,keep_pro)
        finalpre=sess.run(prediction)
        return finalpre


    #设置参数
    if timestep > (int(EndYear)-int(StartYear)+1)*0.5:
        raise ValueError("训练步长过大,请调整后重试")
    elif int(EndYear)-int(StartYear)<(int(PreEndYear)-int(PreStartYear)+timestep):
        raise ValueError("历史时间长度小于 预测时间长度与训练步长之和,请调整后重试")    
    else:
        optimizer="Adam"
        keep_pro=0.9
        batch_size=16
    
        
        #读取数据,确定参数
        name=[pretype]
        finaldata=[]
        outputlen=int(PreEndYear)-int(PreStartYear)+1
        
        datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        data=json.loads(datajson)
        finaldata.append(data)
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
    
        test_size=0#测试数据集应当取0才可以
        X,y=generate_data(final,timestep,outputlen,test_size=test_size,if_norm="no")
        testdata=final[pretype].values
        testinput=[]
        testoutput=[]
        
        num=len(X["train"])
        selet=int(np.floor(num/2))
        testinput=X["train"][selet:,:]
        testoutput=y["train"][selet:,:]
        
        #最终预测需要的数据
        x_pre=testdata[-1:-(timestep+1):-1].reshape(1,-1)
        x_pre=np.array(x_pre, dtype = np.float32)
    
        #训练模型并预测结果
        tf.reset_default_graph()
        with tf.Session() as sess:
            
            with tf.variable_scope("LSTM"):
                ytrain=trainmodel(sess,outputlen,X["train"][:-1,:],y["train"][:-1,:],hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch)
                
            with tf.variable_scope("LSTM",reuse=True):
                test_pre,test_label=runmodel(sess,outputlen,testinput,testoutput,hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch)
            with tf.variable_scope("LSTM",reuse=True):   
                ypre=premodel(sess,outputlen,x_pre,x_pre,hidden_size,hidden_layer,learningrate,optimizer,keep_pro,batch_size,epoch)
        
        mape=MAPE(test_pre,test_label)
        rmse=RMSE(test_pre,test_label)
        
        trainyear=[]
        trainingtrue=y["train"][-1,:]
        for t in trainingtrue:
            count=-1
            for d in final[pretype]:
                count+=1
                
                if t>d-5 and t<d+5:
                    # print("yes")
                    trainyear.append(final.index[count])
                    break
        ypre=np.array(ypre).squeeze()
        result={"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse}
    
        
        return result
コード例 #15
0
def ESQRM(StartYear,
          EndYear,
          PreStartYear,
          PreEndYear,
          quatile=0.95,
          pretype="consumption",
          econamelist=["GDP"],
          city="云南省"):
    """
    

    Parameters
    ----------
    StartYear : str
        历史数据起始年份
    EndYear : str
        历史数据终止年份
    PreStartYear : str
        预测起始年份
    PreEndYear : str
        预测终止年份
    pretype : str
        预测类型:"consumption"、"load"
    quatile : float
        分位数,默认为0.95
    econamelist : list
        选取的经济数据名称列表
    city : str
        选择城市,默认云南省

    Returns
    -------
    "trainfromyear":StartYear  
        
    "traintoyear":EndYear
    
    "trainresult":ytrain,  array
        训练结果
    "prefromyear":PreStartYear
    
    "pretoyear":PreEndYear
    
    "preresult":ypre,  array
        预测结果
    "MAPE":mape, float
        
    "RMSE":rmse, float
        

    """
    def get_coef(data, pretype, econamelist, quatile):
        #获得分位数回归线性关系
        #注意econamelist 最多只能容纳5个变量,yname是str
        n = len(econamelist)
        # print("num",n)
        if n == 1:
            mod = smf.quantreg('%s ~ %s' % (pretype, econamelist[0]), data)
        elif n == 2:
            mod = smf.quantreg(
                '%s ~ %s+%s' % (pretype, econamelist[0], econamelist[1]), data)
        elif n == 3:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s' %
                (pretype, econamelist[0], econamelist[1], econamelist[2]),
                data)
        elif n == 4:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1],
                                      econamelist[2], econamelist[3]), data)
        elif n == 5:
            mod = smf.quantreg(
                '%s ~ %s+%s+%s+%s+%s' %
                (pretype, econamelist[0], econamelist[1], econamelist[2],
                 econamelist[3], econamelist[4]), data)
        res = mod.fit(q=quatile)
        # print(res.summary())
        #返回分位点,截距,各个参数系数 和 各个参数lb,ub
        return quatile, res.params['Intercept'], res.params[
            econamelist], res.conf_int().loc[econamelist]

    def predict(data, intercept, coef, quatile, econamelist):
        #这里的data只有x没有y
        n = len(econamelist)
        pre = [intercept] * len(data.values)
        for i in range(n):
            pre = pre + coef[econamelist[i]] * data[econamelist[i]].values
        pre = np.exp(pre)
        return pre

    #判断经济因素数量是否合适
    if len(econamelist) > 5:
        delnum = len(econamelist) - 5
        print("经济因素选取不应超出5个,请删去%s个,再重新预测。" % delnum)
    elif city == "云南省":
        name = [pretype]
        finaldata = []
        period = int(PreEndYear) - int(PreStartYear) + 1

        #读取历史负荷数据
        datajson = getData("yunnan_year_社会经济类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)

        #读取经济数据
        for i in range(len(econamelist)):

            ecodatajson = getData("yunnan_year_社会经济类", econamelist[i],
                                  StartYear, EndYear)
            ecodata = json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])

        #获取最终数据DataFrame
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        #取对数
        logfinal = final.apply(np.log)

        #预测经济数据
        # print(logfinal[econamelist[0]].to_frame().column)
        eco = preeco.pre(logfinal, econamelist[0], PreStartYear, PreEndYear)
        for j in range(1, len(econamelist)):
            c = preeco.pre(logfinal, econamelist[j], PreStartYear, PreEndYear)
            eco = pd.merge(eco, c, on="year")

        #预测
        q, b, k, lbub = get_coef(logfinal, pretype, econamelist, quatile)
        y = predict(eco, b, k, q, econamelist)

        #求训练集误差mape,rmse
        ytrain = y[:len(y) - period]
        ytraintrue = final[pretype].values[:len(y) - period]
        mape = MAPE(ytrain, ytraintrue)
        rmse = RMSE(ytrain, ytraintrue)
        # print("MAPE=",mape)
        # print("RMSE=",rmse)
        ypre = y[len(y) - period:]

        #返回结果
        result = {
            "trainfromyear": StartYear,
            "traintoyear": EndYear,
            "trainresult": list(ytrain),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": list(ypre),
            "MAPE": mape,
            "RMSE": rmse
        }
    else:
        result = {"False": "暂不支持其他地区预测"}
    return result
コード例 #16
0
ファイル: PCA.py プロジェクト: Lyanf/ynpowerbackend
def PCA(StartYear, EndYear, pretype, econamelist, pmin=0.9, city="云南省"):

    if pmin >= 1 or pmin <= 0:
        raise ValueError("皮尔森因数阈值应当在0到1之间选取")
    elif len(econamelist) < 2:
        raise ValueError("至少选择2个影响因素")
    else:
        pretype = [pretype]
        period = int(EndYear) - int(StartYear) + 1
        #读取历史负荷数据
        finaldata = []
        name = []
        if pretype == None:
            pass
        else:
            for i in range(len(pretype)):
                datajson = getData("云南省_year_电力电量类", pretype[i], StartYear,
                                   EndYear)
                data = json.loads(datajson)
                finaldata.append(data)
                name.append(pretype[i])

        if econamelist == None:
            pass
        else:
            #读取经济数据
            for i in range(len(econamelist)):
                ecodatajson = getData("云南省_year_社会经济类", econamelist[i],
                                      StartYear, EndYear)
                ecodata = json.loads(ecodatajson)
                finaldata.append(ecodata)
                name.append(econamelist[i])

        #获取最终数据DataFrame
        final = pd.DataFrame(finaldata, index=name)
        final = final

        data = final.values

        data2 = []
        dmean = []
        dstd = []
        data3 = []

        zerofactor = []
        for i in range(1, len(data)):
            pccs = pearsonr(data[i], data[0])
            if pccs[0] > pmin:
                data2 = np.r_[data2, data[i]]
            else:
                zerofactor.append(i)

        if len(data2) == 0:
            raise ValueError("皮尔逊系数过大或历史数据时间过短,无法进行分析")
        else:
            data2 = np.array(data2).reshape(-1, period)

            data3 = copy.deepcopy(data2)

            for i in range(len(data2)):
                dmean.append(np.mean(data2[i]))
                dstd.append(np.std(data2[i], ddof=1))
                data3[i] = [(x - dmean[i]) / dstd[i] for x in data2[i]]

            cov = np.cov(data3)

            eig_val, eig_vec = np.linalg.eig(cov)
            s = sum(eig_val.real)
            p = [x / s for x in eig_val.real]

            vector = []
            variance_ratio = []
            for i in range(len(p)):
                if p[i] > 0.1:
                    if len(zerofactor) == 0:
                        v = np.round(eig_vec[i].real, 2).tolist()
                        vector.append(v)
                        variance_ratio.append(
                            np.round(np.array(p[i]), 2).tolist())
                    else:
                        v = np.round(eig_vec[i].real, 2).tolist()
                        for k in zerofactor:
                            v.insert(k, 0)
                        vector.append(v)
                        variance_ratio.append(
                            np.round(np.array(p[i]), 2).tolist())

            n_components = [i for i in range(1, len(variance_ratio) + 1)]

            name = final.index[1:].tolist()

            #获取合适的PCA维度
            # pca = sklearnPCA(0.9)
            # principalComponents = pca.fit_transform(data)
            # #print("n_components = ",pca.n_components_)
            # print(pca.explained_variance_ratio_)
            # print(pca.explained_variance_)

            return {
                "N_components": n_components,
                "ComponetRatio": variance_ratio,
                "FactorName": name,
                "Vectors": vector
            }
コード例 #17
0
ファイル: GM.py プロジェクト: Lyanf/ynpowerbackend
def GM(StartYear,
       EndYear,
       PreStartYear,
       PreEndYear,
       timestep,
       pretype="全社会用电量",
       city="云南省"):
    def RGM(x, n):
        '''
        x为原始序列
        n为往后预测的个数
        '''
        x1 = x.cumsum()  #一次累加
        z1 = (x1[:len(x1) - 1] + x1[1:]) / 2.0  #紧邻均值
        z1 = z1.reshape((len(z1), 1))
        B = np.append(-z1, np.ones_like(z1), axis=1)
        Y = x[1:].reshape((len(x) - 1, 1))
        #a为发展系数 b为灰色作用量
        try:
            [[a], [b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T),
                                Y)  #计算参数
        except:
            raise ValueError("中间矩阵不可逆,请重新调整历史数据时间或步长")
        imitate = list()
        predict = list()
        der = list()
        for index in range(0, x.shape[0]):
            imitate.append((x[0] - b / a) * np.exp(-a * (index)) * (-a))
        for index in range(x.shape[0] + 1, x.shape[0] + n + 1):
            predict.append((x[0] - b / a) * np.exp(-a * (index - 1)) * (-a))
        for index in range(0, x.shape[0] + n):
            der.append((x[0] - b / a) * np.exp(-a * index) * (pow(a, 2)))
            # return {
            #         'a':{'value':a,'desc':'发展系数'},
            #         'b':{'value':b,'desc':'灰色作用量'},
            #         'imitate':{'value':imitate,'desc':'模拟值'},
            #         'predict':{'value':predict,'desc':'预测值'},
            #         'der':{'value':der,'desc':'x0斜率'}
            # }
            return predict, a, b

    def RGMpre(x, n, a, b):
        predict = list()
        for index in range(x.shape[0] + 1, x.shape[0] + n + 1):
            predict.append((x[0] - b / a) * np.exp(-a * (index - 1)) * (-a))
        predict = np.array(predict)
        return predict

    if timestep > (int(EndYear) - int(StartYear) + 1):
        raise ValueError("训练步长过大,请调整后重试.")
    elif timestep < (int(PreEndYear) - int(PreStartYear) + 2):
        raise ValueError("训练步长小于预测年份区间长度,请增加训练步长.")
    else:
        """负荷预测"""
        name = [pretype]
        finaldata = []

        datayear = np.arange(int(StartYear), int(EndYear) + 1)

        #读取历史负荷数据
        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        datafinalyear = int(EndYear)
        trainyear = timestep
        testyear = int(PreEndYear) - int(PreStartYear) + 1

        y = final.values
        y = y.reshape(-1, 1)

        #区分训练数据和预测数据
        num = len(y)
        #训练集
        trainx = y[num - testyear - 1 - trainyear:num - testyear - 1].squeeze()
        trainy = y[num - testyear - 1:num - 1].squeeze()

        #测试集
        testx = y[num - testyear - trainyear:num - testyear].squeeze()
        testy = y[num - testyear:].squeeze()

        #开始训练
        trainpre, a, b = RGM(trainx, testyear)
        #获得测试结果
        testpre = RGMpre(testx, testyear, a, b)

        #获得最终预测
        testpredx = np.array(np.flipud(y[-1:-(trainyear + 1):-1]))
        finalpre = RGMpre(testpredx, testyear, a, b)

        mape = MAPE(testpre, testy)
        rmse = RMSE(testpre, testy)

        ypre = finalpre.reshape(1, -1).squeeze()

        trainyear = datayear[num - testyear:]
        # for t in testy:
        #     count=-1
        #     for d in final[pretype]:
        #         count+=1

        #         if t>d-5 and t<d+5:
        #             # print("yes")
        #             trainyear.append(final.index[count])
        #             break
        result = {
            "trainfromyear": trainyear[0],
            "traintoyear": trainyear[-1],
            "trainresult": trainpre,
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
        #保存
        return result
コード例 #18
0
def Apriori(StartYear,EndYear,pretype,econamelist,city="云南省"):
    loadlist=None
    def frecount(vnum,data1,data2,f):
        
        fre = np.zeros(shape=(4,4))
    
        for i in range(len(data2)):
            x = data1[i] 
            y = data2[i]
            if x == 4: 
                x = x - 1#最大值4归到第三类
            if y == 4: 
                y = y - 1
            fre[x][y] =  fre[x][y] + 1
    

        fre2 = [x / len(data1) for x in fre]
        # print(fre2)
        
        score=0
        confidence=1
        for i in range(4):
            for k in range(4):
                if fre[i][k] > f :
                    score=score+fre[i][k]/vnum
                    c=fre[i][k]/sum(fre[i])
                    if confidence>c:
                        confidence=c
                    print("负荷等级" +str(i)+"与因素等级"+str(k)+"有关联,支持度为"+str(fre[i][k]/vnum)+",置信度为"+str(c))
        
        
        return score,confidence
    
    period=int(EndYear)-int(StartYear)+1
    #读取历史负荷数据
    finaldata=[]
    name=[pretype]
    datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
    data=json.loads(datajson)
    finaldata.append(data)


    if loadlist == None:
        pass
    else:
        for i in range(len(loadlist)):
            datajson = getData("云南省_year_电力电量类", loadlist[i], StartYear, EndYear)
            data=json.loads(datajson)
            finaldata.append(data)
            name.append(loadlist[i])
    
    if econamelist == None:
        pass
    else:
        #读取经济数据
        for i in range(len(econamelist)):
            ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear)
            ecodata=json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])

    #获取最终数据DataFrame
    final=pd.DataFrame(finaldata,index=name)
    final=final
    
    data=final.values
    data2 =[]
    
    for i in range(len(final)):
        fg = 0
        m = min(data[i])
        d = max(data[i]) - m
        dd = [ int((x - m) / d * 4 ) for x in data[i]] #归一化
        
        # for k in range(4):
        #     num = str(dd).count(str(k))
        #     if num > (len(data) / 2):
        #         fg = 1
        #         break

        # if fg == 1:#如果分类结果不理想,则重新分类
        #     sor = sorted(data[i])
        #     dd = []
        #     for k in range(len(data.T) - 1):
        #         d = sor.index(data[i][k])
                
        #         dd.append(int(d /(len(final)/4) ))
        
        data2.append(dd)
    
    factorname=[]
    factorconfi=[]
    factorscore=[]    
    
    for i in range(1,len(data)):
        print("分析第"+str(i+1)+"个因素")
        factor=frecount(len(final.T), data2[0],data2[i],4)
        factorname.append(name[i])
        factorconfi.append(factor[1]-0.05)
        factorscore.append(factor[0])
    factorscore=np.round(factorscore,2).tolist()
    factorconfi=np.round(factorconfi,2).tolist()
    print(factorscore)
    print(factorconfi)
    if sum(factorscore)==0:
        if period<15:
            raise ValueError("历史数据年份过短,因素集未显示明显的关联关系,建议选择15年以上数据")
        else:
            raise ValueError("未发现所选因素与关联目标间的关联关系")
    else:
        return {"FactorsName":factorname,"Score":factorscore,"Confidence":factorconfi}
コード例 #19
0
ファイル: GBDT.py プロジェクト: Lyanf/ynpowerbackend
def GBDT(StartYear,
         EndYear,
         PreStartYear,
         PreEndYear,
         timestep,
         pretype="全社会用电量",
         city="云南省",
         LearningRate=0.5,
         MaxDepth=20,
         NumberofEstimators=500):

    if timestep > (int(EndYear) - int(StartYear) + 1):
        raise ValueError("训练步长过大,请调整后重试.")
    elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) +
                                          timestep):
        raise ValueError("历史时间长度小于预测时间长度,请增加历史时间长度或减小预测时间长度.")
    else:
        #读取数据,确定参数
        name = [pretype]
        finaldata = []
        outputlen = int(PreEndYear) - int(PreStartYear) + 1

        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        data = json.loads(datajson)
        finaldata.append(data)
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        test_size = 0  #测试数据集应当取0.3才可以
        X, y = generate_data(final,
                             timestep,
                             outputlen,
                             test_size=test_size,
                             if_norm="no")

        gbdt = xgb.XGBRegressor(max_depth=MaxDepth,
                                learning_rate=LearningRate,
                                n_estimators=NumberofEstimators,
                                silent=True,
                                objective='reg:linear',
                                booster='gblinear',
                                n_jobs=50,
                                nthread=None,
                                gamma=0,
                                min_child_weight=1,
                                max_delta_step=0,
                                subsample=1,
                                colsample_bytree=1,
                                colsample_bylevel=1,
                                reg_alpha=0,
                                reg_lambda=1,
                                scale_pos_weight=1,
                                base_score=0.5,
                                random_state=0,
                                seed=None,
                                missing=None,
                                importance_type='gain')  #

        multi_model = MultiOutputRegressor(gbdt)
        multi_model.fit(X["train"], y["train"])

        testdata = final.values
        num = len(X["train"])
        selet = int(np.floor(num / 2))
        testinput = X["train"][selet:, :]
        testoutput = y["train"][selet:, :]

        x_pre = np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape(
            1, -1)

        y1_gbdt = multi_model.predict(testinput)
        y1_gbdt_real = np.array(y1_gbdt).reshape(-1, 1)
        y1_real = np.array(testoutput).reshape(-1, 1)

        mape = MAPE(y1_gbdt_real, y1_real)
        rmse = RMSE(y1_gbdt_real, y1_real)

        ytrain = y1_gbdt[-1]
        trainyear = []
        for t in testoutput[-1]:
            count = -1
            for d in final[pretype]:
                count += 1
                if t > d - 1 and t < d + 1:
                    trainyear.append(final.index[count])
                    break
        pre = multi_model.predict(x_pre)
        ypre = np.array(pre).flatten().tolist()
        result = {
            "trainfromyear": trainyear[0],
            "traintoyear": trainyear[-1],
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre,
            "MAPE": mape,
            "RMSE": rmse
        }
        #保存
        return result
コード例 #20
0
def EEMDIndustry(StartYear,
                 EndYear,
                 PreStartYear,
                 PreEndYear,
                 pretype,
                 city="云南省"):
    #判定当前的时间序列是否是单调序列

    #读取年度数据
    yeardatajson = getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear)
    yeardata = json.loads(yeardatajson)
    pdyeardata = pd.DataFrame(yeardata, index=[pretype])
    pdyeardata = pdyeardata.T

    totalyear = int(EndYear) - int(StartYear) + 1
    timestep = int(PreEndYear) - int(PreStartYear) + 1

    trainyear = math.floor(totalyear - totalyear * 0.4)
    delay = math.floor((totalyear - trainyear - timestep) * 0.7)
    testyear = trainyear + delay
    if testyear + timestep > totalyear or delay < 1:
        raise ValueError("历史数据时间间隔过短或预测年份过长")
    else:
        train_x = pdyeardata[pretype].values[:trainyear]
        train_y = pdyeardata[pretype].values[trainyear:trainyear + timestep]
        train_x = train_x.reshape(1, -1)
        train_y = train_y.reshape(1, -1)

        test_x = pdyeardata[pretype].values[delay:testyear]
        test_y = pdyeardata[pretype].values[testyear:testyear + timestep]
        test_x = test_x.reshape(1, -1)
        test_y = test_y.reshape(1, -1)

        testdata = pdyeardata[pretype].values
        finalpre = np.array(np.flipud(
            testdata[-1:-(trainyear + 1):-1])).reshape(1, -1)

        eemd = EMD()
        IMFs = eemd(train_x.squeeze())[-1].reshape(1, -1)
        testIMFs = eemd(test_x.squeeze())[-1].reshape(1, -1)
        preIMFs = eemd(finalpre.squeeze())[-1].reshape(1, -1)

        gbdt = xgb.XGBRegressor(max_depth=5,
                                learning_rate=0.1,
                                n_estimators=100,
                                silent=True,
                                objective='reg:linear',
                                booster='gblinear',
                                n_jobs=50,
                                nthread=None,
                                gamma=0,
                                min_child_weight=1,
                                max_delta_step=0,
                                subsample=1,
                                colsample_bytree=1,
                                colsample_bylevel=1,
                                reg_alpha=0,
                                reg_lambda=1,
                                scale_pos_weight=1,
                                base_score=0.5,
                                random_state=0,
                                seed=None,
                                missing=None,
                                importance_type='gain')  #

        multi_model = MultiOutputRegressor(gbdt)

        # svr=SVR(kernel="poly",gamma="scale",C= 0.001)#kernel="linear","poly"
        # multi_model = MultiOutputRegressor(svr)
        multi_model.fit(IMFs, train_y)

        testpredict = multi_model.predict(testIMFs)
        ypre = multi_model.predict(preIMFs)

        print(testpredict, test_y)
        mape = MAPE(testpredict, test_y)
        rmse = RMSE(testpredict, test_y)

        teststarty = int(StartYear) + testyear - 1
        testendy = teststarty + timestep - 1
        ytrain = testpredict.flatten()
        ypre = ypre.reshape(-1, 1).squeeze()

        result = {
            "trainfromyear": teststarty,
            "traintoyear": testendy,
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }
        return result
コード例 #21
0
def UnarylinearTime(StartYear,
                    EndYear,
                    PreStartYear,
                    PreEndYear,
                    pretype="全社会用电量",
                    city="云南省",
                    planflag=0,
                    plan=0):
    """一元一次外推"""

    if city == "云南省":
        name = [pretype]
        finaldata = []

        #读取历史负荷数据
        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)

        #获取最终数据DataFrame
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        realyear = np.arange(int(StartYear), int(EndYear) + 1)

        final["time"] = realyear

        x = final["time"].values * (1 + plan * 0.01)
        y = final[pretype].values  #load

        x = x.reshape(-1, 1)
        y = y.reshape(-1, 1)

        preyear = np.arange(int(PreStartYear), int(PreEndYear) + 1)
        year = len(preyear)
        #区分训练数据和预测数据
        num = len(x)
        if num < 2 + year:
            raise ValueError("历史数据过少或预测年份过长,请重新选择")
        elif year < 2:
            raise ValueError("该算法不支持两年以下的预测")
        else:
            trainx = x[num - 2 - year:num - 2]
            trainy = y[num - 2 - year:num - 2]

            testx = x[num - 1 - year:num - 1]
            testy = y[num - 1 - year:num - 1]

            # trainp = ic.getpred(trainx,year,planflag,plan)
            # trainp = np.array(trainp).T
            # trainpm = []
            # for i in range(51):
            #     trainpm.append(np.mean(trainp[i]))
            # trainpmm = trainpm.index(np.median(trainpm))
            # trainpredx = trainp[trainpmm]
            # trainpredx = [k * trainx[-1] for k in trainpredx]

            # print(trainx)
            # print(trainpredx)
            reg = LinearRegression().fit(trainx, trainy)

            # reg = LinearRegression().fit(x, y)

            # testp = ic.getpred(testx,year,planflag,plan)
            # testp = np.array(testp).T
            # testpm = []
            # for i in range(51):
            #     testpm.append(np.mean(testp[i]))
            # testpmm = testpm.index(np.median(testpm))
            # testpredx = testp[testpmm]
            # testpredx = [k * testx[-1] for k in testpredx]
            testpredy = [
                testx * reg.coef_[0][0] + reg.intercept_[0] for testx in testx
            ]

            # loadp = reg.predict(testx)#趋势外推

            mape = MAPE(testpredy, testy)
            rmse = RMSE(testpredy, testy)

            trainyear = realyear[num - 1 - year:num - 1]

            preyear = np.arange(int(PreStartYear),
                                int(PreEndYear) + 1) * (1 + plan * 0.01)

            reg1 = LinearRegression().fit(x, y)

            # p = ic.getpred(preyear,year,planflag,plan)
            # p = np.array(p).T
            # pm = []
            # for i in range(51):
            #     pm.append(np.mean(p[i]))
            # pmm = pm.index(np.median(pm))
            # predx = p[pmm]
            # predx = [k * x[-1] for k in predx]

            predy = [
                x * reg1.coef_[0][0] + reg1.intercept_[0] for x in preyear
            ]
            predy = np.array(predy).squeeze()

            #存储
            ytrain = np.array(testpredy).squeeze()
            ypre = np.array(predy).squeeze()
            result = {
                "trainfromyear": trainyear[0],
                "traintoyear": trainyear[-1],
                "trainresult": ytrain.tolist(),
                "prefromyear": PreStartYear,
                "pretoyear": PreEndYear,
                "preresult": ypre.tolist(),
                "MAPE": mape,
                "RMSE": rmse
            }
            return result
コード例 #22
0
ファイル: Kmeans.py プロジェクト: Lyanf/ynpowerbackend
def Kmeans(StartYear,EndYear,pretype,econamelist,n_clusters,city="云南省"):
    """
    

    Parameters
    ----------
    StartYear : TYPE
        DESCRIPTION.
    EndYear : TYPE
        DESCRIPTION.
    pretype : list
        DESCRIPTION.
    econamelist : list
        DESCRIPTION.
    n_clusters : int
        簇的个数.
    city : TYPE, optional
        DESCRIPTION. The default is "云南省".

    """
    if n_clusters>(len(pretype)+len(econamelist))*0.5:
        m=math.ceil((len(pretype)+len(econamelist))*0.5)
        raise ValueError ("聚类数过大,建议在1-%s之间选取"%m)
    if n_clusters==0:
        raise ValueError ("聚类数不可设置为小于1的整数")
    else:
        finaldata=[]
        name=[]
        if pretype == None:
            pass
        else:
            #读取历史负荷数据
            for i in range(len(pretype)):
                datajson = getData("云南省_year_电力电量类", pretype[i], StartYear, EndYear)
                data=json.loads(datajson)
                finaldata.append(data)
                name.append(pretype[i])
        
        if econamelist == None:
            pass
        else:

            #读取经济数据
            for i in range(len(econamelist)):
                
                ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear)
                ecodata=json.loads(ecodatajson)
                finaldata.append(ecodata)
                name.append(econamelist[i])
    
        #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final
        
        estimator = KMeans(n_clusters)
        estimator.fit(final)
        label_pred = estimator.labels_
        centroids = estimator.cluster_centers_ 
        
        relatedlabel=label_pred.tolist()
        relatedname= []
        
        
        for n in range(n_clusters):
            label=[]
            for i in range(len(relatedlabel)):
                if relatedlabel[i] == n:
                    label.append(name[i])
            relatedname.append(label)
            
        
        
        return {"Clusters":relatedname}
コード例 #23
0
ファイル: Unarylinear.py プロジェクト: Lyanf/ynpowerbackend
def Unarylinear(StartYear,
                EndYear,
                PreStartYear,
                PreEndYear,
                pretype="全社会用电量",
                econamelist="GDP",
                city="云南省",
                planflag=1,
                plan=1,
                pro=1):
    """
    

    Parameters
    ----------
    StartYear : str
        历史数据起始年份
    EndYear : str
        历史数据终止年份
    PreStartYear : str
        预测起始年份
    PreEndYear : str
        预测终止年份
    pretype : str
        预测类型:"consumption"、"load"
    econamelist : list
        用到的社会经济类数据名称, e.g., ["GDP","人口"].
    city : str, optional
        预测城市. The default is "云南省".
    planflag : TYPE, optional
        是否有规划值,1代表有,0代表没有. The default is 0.
    plan : TYPE, optional
        规划指数值. The default is 0.

    Returns
    -------
    None.

    """
    econamelist = [econamelist]
    if len(econamelist) != 1:
        raise ValueError("仅支持选择一个因素变量")

    elif city == "云南省":
        name = [pretype]
        finaldata = []

        #读取历史负荷数据
        datajson = getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)

        #读取经济数据
        ecodatajson = getData("云南省_year_社会经济类", econamelist[0], StartYear,
                              EndYear)
        ecodata = json.loads(ecodatajson)
        finaldata.append(ecodata)
        name.append(econamelist[0])

        #获取最终数据DataFrame
        final = pd.DataFrame(finaldata, index=name)
        final = final.T

        x = final[econamelist[0]].values
        y = final[pretype].values  #load

        x = x.reshape(-1, 1)
        y = y.reshape(-1, 1)

        #区分训练数据和预测数据
        num = len(x)
        testyear = math.ceil(num / 8)
        if testyear < 2:
            raise ValueError("历史数据过少或预测年份过长,请重新选择")
        # if testyear<3:
        #      raise ValueError("历史数据过少或预测年份过长,请重新选择")
        else:
            trainx = x[:num - testyear]
            trainy = y[:num - testyear]

            testx = x[num - testyear:]
            testy = y[num - testyear:]

            reg = LinearRegression().fit(trainx, trainy)

            # reg = LinearRegression().fit(x, y)

            testp = ic.getpred(testx, testyear, planflag, plan, pro)
            testp = np.array(testp).T
            testpm = []
            for i in range(51):
                testpm.append(np.mean(testp[i]))

            testpmm = testpm.index(np.median(testpm))
            testpredx = testp[testpmm]
            testpredx = [k * testx[-1] for k in testpredx]
            testpredy = [
                testx * reg.coef_[0][0] + reg.intercept_[0]
                for testx in testpredx
            ]

            # loadp = reg.predict(testx)#趋势外推

            mape = MAPE(testpredy, testy)
            rmse = RMSE(testpredy, testy)

            historyyear = np.arange(int(StartYear), int(EndYear) + 1)
            trainyear = historyyear[num - testyear:]
            # for t in testy:
            #     count=-1
            #     for d in final[pretype]:
            #         count+=1

            #         if t>d-5 and t<d+5:
            #             # print("yes")
            #             trainyear.append(final.index[count])
            #             break

            preyear = np.arange(int(PreStartYear), int(PreEndYear) + 1)
            year = len(preyear)
            p = ic.getpred(x, year, planflag, plan, pro)
            p = np.array(p).T
            pm = []
            for i in range(51):
                pm.append(np.mean(p[i]))
            pmm = pm.index(np.median(pm))
            predx = p[pmm]
            predx = [k * x[-1] for k in predx]

            predy = [x * reg.coef_[0][0] + reg.intercept_[0] for x in predx]
            predy = np.array(predy).squeeze()

            #存储
            ytrain = np.array(testpredy).squeeze()
            ypre = np.array(predy).squeeze()
            result = {
                "trainfromyear": trainyear[0],
                "traintoyear": trainyear[-1],
                "trainresult": ytrain.tolist(),
                "prefromyear": PreStartYear,
                "pretoyear": PreEndYear,
                "preresult": ypre.tolist(),
                "MAPE": mape,
                "RMSE": rmse
            }
            return result
コード例 #24
0
def LogarithmTime(StartYear,EndYear,PreStartYear,PreEndYear,pretype="全社会用电量",city="云南省",planflag=0,plan=0):
    

    """对数函数"""
    
    def func5(params, x):
        a, b = params
        return a * np.log(x) + b 
    
    def error5(params, x, y):
        return func5(params, x) - y
    
    def slovePara5(x,y):
        p0 = [1, 0.02]
        Para = leastsq(error5, p0, args=(x, y))
        return Para
    
    
    if city=="云南省":
        name=[pretype]
        finaldata=[]
        
    
        #读取历史负荷数据
        datajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        # print(datajson)
        data=json.loads(datajson)
        finaldata.append(data)
        
        
        #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
        
        realyear = np.arange(int(StartYear),int(EndYear)+1)     

        final["time"]=realyear

        x = final["time"].values*(1+plan*0.01)
        y = final[pretype].values        #load


        x = x.reshape(-1,1)
        y = y.reshape(-1,1)


        #区分训练数据和预测数据
        preyear = np.arange(int(PreStartYear),int(PreEndYear)+1)*(1+plan*0.01)
        year=len(preyear)
        #区分训练数据和预测数据
        num=len(x)
        if num<3+year:
            raise ValueError("历史数据过少或预测年份过长,请重新选择")
        elif year<2:
            raise ValueError("该算法不支持两年以下的预测")
        else:
            trainx=x[num-2-year-1:num-2].squeeze()
            trainy=y[num-2-year-1:num-2].squeeze()
            
            testx=x[num-1-year:num].squeeze()
            testy=y[num-1-year:num].squeeze()
        

            Para = slovePara5(trainx,trainy)
            a, b = Para[0]
            
            testp = ic.getpred(testx,year+1,planflag,plan)
            testp = np.array(testp).T
            testpm = []
            for i in range(51):
                testpm.append(np.mean(testp[i]))
            testpmm = testpm.index(np.median(testpm))
            testpredx = testp[testpmm]
            testpredx = [k * testx[-1] for k in testpredx]
            testpredy = [a*np.log (x) + b for x in testx]
    
    
            trainyear=realyear[num-1-year:num]   
            
            mape=MAPE(testpredy,testy)
            rmse=RMSE(testpredy,testy)
            
            x=x.squeeze()
            y=y.squeeze()
            Parapre = slovePara5(x,y)
            ap, bp = Parapre[0]
            
        
            
            p = ic.getpred(preyear,year,planflag,plan)
            p = np.array(p).T
            pm = []
            for i in range(51):
                pm.append(np.mean(p[i]))
            pmm = pm.index(np.median(pm))
            predx = p[pmm]
            predx = [k * x[-1] for k in predx]
                
            predy = [ap*np.log (x0) + bp for x0 in preyear]
            predy=np.array(predy).squeeze()
            
    
            
            #存储
            ytrain=np.array(testpredy).squeeze()
            ypre=np.array(predy).squeeze()
            result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain.tolist(),"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre.tolist(),"MAPE":mape,"RMSE":rmse}
            return result
コード例 #25
0
def PCAIndustry(StartYear,EndYear,PreStartYear,PreEndYear,pretype,econamelist,city="云南省"):
    
    if city=="云南省":
        name=[pretype]
        finaldata=[]
        period=int(PreEndYear)-int(PreStartYear)+1
        historyyear=np.arange(int(StartYear),int(EndYear)+1)
        
        #读取历史负荷数据
        datajson=getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear)
        # print(datajson)
        data=json.loads(datajson)
        finaldata.append(data)
        
        #读取经济数据
        for i in range(len(econamelist)):
            ecodatajson=getData("云南省_year_社会经济类", econamelist[i], StartYear, EndYear)
            ecodata=json.loads(ecodatajson)
            finaldata.append(ecodata)
            name.append(econamelist[i])
        
        #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
        final["Year"]=historyyear
        
        #预测经济数据
        # print(logfinal[econamelist[0]].to_frame().column)
        eco=preeco.pre(final,econamelist[0],PreStartYear,PreEndYear)
        for j in range(1,len(econamelist)):
            c=preeco.pre(final,econamelist[j],PreStartYear,PreEndYear)
            eco=pd.merge(eco,c,on="year")  

        Index=eco.columns[1:].tolist()#经济特征名称
        


        ##对特征数据进行归一化处理
        scaler = StandardScaler()
        scaler.fit(eco[Index].values)
        Data_eco_scaler = scaler.transform(eco[Index].values)
        Data_eco_scaler=pd.DataFrame(data=Data_eco_scaler,columns=Index)

        Data_eco_scaler["Year"]=eco["year"].values#归一化后的特征数据
        
        
        #获得训练数据集合测试数据集
                
        train_start_year=int(StartYear)
        train_end_year=int(StartYear)+math.ceil(len(historyyear)*0.7)
        test_start_year=int(StartYear)+math.ceil(len(historyyear)*0.7)
        test_end_year=int(EndYear)



        x_train=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(train_start_year,train_end_year+1))]
        y_train=final.loc[final["Year"].isin(range(train_start_year,train_end_year+1))]
        x_test=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(test_start_year,test_end_year+1))]
        y_test=final.loc[final["Year"].isin(range(test_start_year,test_end_year+1))]


        #获取合适的PCA维度
        pca = PCA(0.9)
        principalComponents = pca.fit_transform(Data_eco_scaler[Index].values)
        n_components = pca.n_components_#得到PCA的维度
        #print("n_components = ",pca.n_components_)
        
        #进行PCA分析
        pca = PCA(n_components)
        
        pca.fit(x_train[Index].values)
        
        x_train_pca=pca.transform(x_train[Index].values)
        y_train_pca=y_train[pretype]
        x_test_pca=pca.transform(x_test[Index].values)
        y_test_pca=y_test[pretype]
        
        #建立线性回归
        pca_model = LinearRegression()
        pca_model.fit(x_train_pca, y_train_pca)
        pca_predict = pca_model.predict(x_test_pca)
        
        #评价指标
        
        rmse = RMSE(pca_predict,y_test_pca)
        mape = MAPE(pca_predict,y_test_pca)
        
        #保存训练结果
        # trainyear=[]
        # for t in y_test_pca:
        #     for d in final.values:
        #         if t>d[1]-5 and t<d[1]+5:
        #             trainyear.append(d[0])
        #             break

        trainyear=[]
        for t in y_test_pca:
            count=-1
            for d in final[pretype]:
                count+=1
                
                if t>d-5 and t<d+5:
                    # print("yes")
                    trainyear.append(final.index[count])
                    break
        
        #预测
        predata=Data_eco_scaler.loc[Data_eco_scaler["Year"].isin(range(int(PreStartYear),int(PreEndYear)+1))]
        predatatrain=pca.transform(predata[Index].values)
        predict=pca_model.predict(predatatrain)
        #PCA线性模型参数
        #pca_coef = pca_model.coef_
        
        #存储
        ytrain=pca_predict.tolist()
        ypre=np.array(predict).squeeze().tolist()
        
        result={"trainfromyear":trainyear[0],"traintoyear":trainyear[-1],"trainresult":ytrain,"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre,"MAPE":mape,"RMSE":rmse}
        return result 
コード例 #26
0
ファイル: RFIndustry.py プロジェクト: Lyanf/ynpowerbackend
def RFIndustry(StartYear,
               EndYear,
               PreStartYear,
               PreEndYear,
               timestep,
               pretype,
               n_estimators=50,
               city="云南省"):
    """   

    Parameters
    ----------
    StartYear : str
        历史数据起始年份
    EndYear : str
        历史数据终止年份
    PreStartYear : str
        预测起始年份
    PreEndYear : str
        预测终止年份
    timestep  :  int
        训练数据步长, 常常大于预测时间段的2倍
    n_estimators  :  int
        随机森林数目个数. The default is 50.
    pretype : str, optional
        预测类型:"consumption"、"load". The default is "consumption".
    city : str, optional
        选择城市. The default is "云南省".

    Returns
    -------
    "trainfromyear":StartYear  
        
    "traintoyear":EndYear
    
    "trainresult":ytrain,  array
        训练结果
    "prefromyear":PreStartYear
    
    "pretoyear":PreEndYear
    
    "preresult":ypre,  array
        预测结果
    "MAPE":mape, float
        
    "RMSE":rmse, float
    
    """

    if timestep > (int(EndYear) - int(StartYear) + 1):
        raise ValueError("训练步长过大,请调整后重试")
    elif int(EndYear) - int(StartYear) < (int(PreEndYear) - int(PreStartYear) +
                                          timestep):
        raise ValueError("历史时间长度小于预测时间长度,请增加历史时间长度或减小预测时间长度")
    else:

        name = [pretype]
        finaldata = []

        outputlen = int(PreEndYear) - int(PreStartYear) + 1

        #读取历史负荷数据
        datajson = getData("云南省_year_电力电量类-行业", pretype, StartYear, EndYear)
        # print(datajson)
        data = json.loads(datajson)
        finaldata.append(data)
        final = pd.DataFrame(finaldata, index=name)
        final = final.T
        test_size = 0

        X, y = generate_data(final,
                             timestep,
                             outputlen,
                             test_size=test_size,
                             if_norm="no")
        y["train"].ravel()
        #构建随机森林模型
        rf = RandomForestRegressor(n_estimators)  #n_estimators:森林个数
        rf.fit(X["train"], y["train"])

        testdata = final[pretype].values
        testinput = []
        testoutput = []
        num = len(X["train"])
        selet = int(np.floor(num / 2))
        testinput = X["train"][selet:, :]
        testoutput = y["train"][selet:, :]

        #训练结果
        y_rf = rf.predict(testinput)
        y_rf_real = np.array(y_rf).reshape(-1, 1)  #训练数据预测结果
        y_real = np.array(testoutput).reshape(-1, 1)

        mape = MAPE(y_rf_real, y_real)
        rmse = RMSE(y_rf_real, y_real)

        #目标结果,修正
        pre_y_rf = rf.predict(
            np.array(np.flipud(testdata[-1:-(timestep + 1):-1])).reshape(
                1, -1)) + 500

        #保存训练结果

        trainyear = []
        for t in y_real:
            count = -1
            for d in final[pretype]:
                count += 1

                if t > d - 5 and t < d + 5:
                    # print("yes")
                    trainyear.append(final.index[count])
                    break

        ytrain = y_rf_real.flatten()
        ypre = pre_y_rf.flatten()

        result = {
            "trainfromyear": trainyear[0],
            "traintoyear": trainyear[-1],
            "trainresult": ytrain.tolist(),
            "prefromyear": PreStartYear,
            "pretoyear": PreEndYear,
            "preresult": ypre.tolist(),
            "MAPE": mape,
            "RMSE": rmse
        }

        return result
コード例 #27
0
def ForIndustry(StartYear,EndYear,PreStartYear,PreEndYear,rejectlsit,proposedata,Premethod):

    pretype="全社会用电量"
    propose = pd.read_csv(proposedata, encoding="UTF-8")
    column=propose.columns
    
    if len(propose.values) != int(PreEndYear)-int(PreStartYear)+1:
        raise ValueError("上传数据的年限与预测年限不符,请重新上传.")
    elif len(column)-1 != len(rejectlsit):
        raise ValueError("大用户导入的数据名称与要剔除的行业名单不符,请重新上传.")
    elif set(column[1:])!=set(rejectlsit):
        raise ValueError("大用户导入的数据名称与要剔除的行业名单不符,请重新上传.")
    
    else:
        #读取年度数据
        name=[pretype]
        finaldata=[]
        yeardatajson=getData("云南省_year_电力电量类", pretype, StartYear, EndYear)
        yeardata=json.loads(yeardatajson)
        finaldata.append(yeardata)
        
        #读取行业数据
        for i in range(len(rejectlsit)):
            
            inddatajson=getData("云南省_year_电力电量类-行业", rejectlsit[i], StartYear, EndYear)
            inddata=json.loads(inddatajson)
            print(inddata)
            if len(inddata)==0:
                raise ValueError("%s 中不存在 %s-%s 年的%s 数据"%("电力电量类-行业",StartYear,EndYear,rejectlsit[i]))
            else:
                finaldata.append(inddata)
                name.append(rejectlsit[i])
            
        #获取最终数据DataFrame
        final=pd.DataFrame(finaldata,index=name)
        final=final.T
        final[pretype]=final[pretype].values*10000
        
        year=final.index.tolist()
        forpredata=[]
        for i in range(len(final)):
            drop=final[pretype].values[i]
            for j in range(len(rejectlsit)):
                drop=drop-final[rejectlsit[j]].values[i]
            forpredata.append(drop)
        
        
        savetype="剔除大用户的社会用电量"
        savetourl=pd.DataFrame()
        savetourl["year"]=year
        savetourl[savetype]=forpredata
        r=insertData(savetourl, "year","云南省", "电力电量类")
        
        
        if Premethod=="指数函数外推":
            result=ExponentTime.ExponentTime(StartYear,EndYear,PreStartYear,PreEndYear,pretype = savetype, city="云南省")
        elif Premethod=="灰色滑动平均模型":
            T=math.floor(len(forpredata)/3)
            result=GM.GM(StartYear,EndYear,PreStartYear,PreEndYear,timestep=T,pretype=savetype,city="云南省")
        elif Premethod== "生长函数外推":
            result=GrowthTime.GrowthTime(StartYear,EndYear,PreStartYear,PreEndYear,pretype=savetype,city="云南省")
        elif Premethod== "一元线性外推":
            result=UnarylinearTime.UnarylinearTime(StartYear,EndYear,PreStartYear,PreEndYear,pretype=savetype,city="云南省")
        elif Premethod== "对数函数外推":
            result=LogarithmTime.LogarithmTime(StartYear,EndYear,PreStartYear,PreEndYear,pretype=savetype,city="云南省")
        elif Premethod=="基于滚动机制的灰色预测模型":
            T=math.floor(len(forpredata)/3)
            result=GPRM.GPRM(StartYear,EndYear,PreStartYear,PreEndYear,T,pretype=savetype,city="云南省")
        elif Premethod== "模糊指数平滑模型":
            T=math.floor(len(forpredata)/3)
            result=FER.FER(StartYear,EndYear,PreStartYear,PreEndYear,T,pretype=savetype,city="云南省")
        elif Premethod=="模糊线性回归模型":
            T=math.floor(len(forpredata)/3)
            result=FLR.FLR(StartYear,EndYear,PreStartYear,PreEndYear,T,pretype=savetype,city="云南省")   
        elif Premethod == "梯度提升模型":
            T=math.floor(len(forpredata)/3)
            result=GBDT.GBDT(StartYear,EndYear,PreStartYear,PreEndYear,T,pretype=savetype,city="云南省")
        elif Premethod == "支持向量机模型":
            T=math.floor(len(forpredata)/3)
            result=SVM.SVM(StartYear,EndYear,PreStartYear,PreEndYear,T,pretype=savetype,city="云南省")
        elif Premethod == "随机森林模型":
            T=math.floor(len(forpredata)/3)
            result=RandomForest.RandomForest(StartYear,EndYear,PreStartYear,PreEndYear,T,pretype=savetype,n_estimators=50,city="云南省")
            
        if isinstance(result["preresult"],str):
            raise ValueError("预测失败,请重新选择预测方法.")
        else:
            ypre=[]
            for k in range(len(propose.values)):
                power=result["preresult"][k]
                for n in range(len(rejectlsit)):
                    power=power+propose[rejectlsit[n]].values[k]
                ypre.append(power)
        result={"prefromyear":PreStartYear,"pretoyear":PreEndYear,"preresult":ypre}
        return result