예제 #1
0
import os
from dP_ResNet import load_stdata
from dP_ResNet.config import Config
import tensorly as tl
import numpy as np
import h5py as h5
from tensorly.decomposition import tucker,non_negative_tucker,non_negative_parafac
np.random.seed(1337)  # 保证随机数的可复现性

DATAPATH = Config().DATAPATH
mapHeight = 16
mapWidth = 16
filePath = "../datasets/JN_Fill_2017-2020_M{}x{}_Power.h5".format(mapHeight,mapWidth)

data, timestamps = load_stdata(filePath)
print(data[0][0])
data_=data.reshape(data.shape[0],data.shape[2],data.shape[3])
data__=data_.copy()
print("--*" * 10)
print(data__[0])
예제 #2
0
from dP_ResNet.config import Config
from dP_ResNet import metrics, PowerJN
from keras import backend as K
from Predict.TrainDP_ResNet import build_model
import matplotlib.pyplot as plt
from keras.utils.vis_utils import plot_model
from dP_ResNet.metrics import mse, rmse

K.set_image_data_format('channels_first')
plt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  #用来正常显示负号

DATAPATH = Config().DATAPATH  # 获取训练数据的地址
nb_epoch = 500  # 选取部分训练数据作为测试数据,训练的册数
batch_size = 32  # 批大小
T = 1  # 时间间隔数

len_closeness = 3  # 邻近性依赖序列的长度
# 只考虑邻近性
len_period = 3  # 周期性依赖序列的长度
len_trend = 3  # 趋势性依赖序列的长度
nb_residual_unit = 4  # 残差单元的数量

nb_flow = 1  # 电量维度

#城市被划分为16 * 16的区域
map_height, map_width = 16, 16


X_train, Y_train, X_test, \
Y_test, mmn, metadata_dim, timestamp_train, \
예제 #3
0
from keras import backend as K
from dP_ResNet.metrics import rmse
from keras.utils import plot_model

K.set_image_data_format('channels_first')
'''
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf

config=tf.ConfigProto()
config.gpu_options.allocator_type='BFC'
config.gpu_options.per_process_gpu_memory_fraction=0.3
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))
'''
DATAPATH = Config().DATAPATH  # 获取训练数据的地址    congig绝对路径
nb_epoch = 500  # 选取部分训练数据作为测试数据,训练的册数
# nb_epoch = 700
nb_epoch_cont = 100  # 测试数据,训练数据进行验证
batch_size = 32  # 批大小
T = 1  # 一天中的时间间隔数

lr = 0.0002  # adam的常设0.001
len_closeness = 3  # 邻近性依赖序列的长度
# 只考虑邻近性
len_period = 0  # 周期性依赖序列的长度
len_trend = 3  # 趋势性依赖序列的长度
nb_residual_unit = 3  # 残差单元的数量  3层

nb_flow = 1  # 电量维度
예제 #4
0
def load_data(T=1,
              nb_flow=1,
              len_closeness=3,
              len_period=3,
              len_trend=3,
              preprocess_name='preprocessing.pkl',
              meta_data=False,
              meteorol_data=False):
    '''
    :param T:周期数,一天划分为T个时间片段 1
    :param nb_flow:流的种类,这里只有日发电量
    :param len_closeness:邻近性依赖序列的长度 3
    :param len_period:周期性依赖序列的长度 0
    :param len_trend:趋势性依赖序列的长度 0
    :param len_test:测试数据的长度
    :return:
    '''
    assert (len_closeness + len_period + len_trend > 0)

    # 加载数据,data为城市中每个格子的发电量。timestamps为对应的时间
    DATAPATH = Config().DATAPATH

    FilePath = os.path.join(DATAPATH,
                            'JN_Fill_2017-2020_M16x16_Power_Decomposition.h5')
    #FilePath=os.path.join(DATAPATH, 'JN_Fill_2017-2020_M16x16_Power2.h5')
    print("加载文件:", FilePath)
    data, timestamps = load_stdata(FilePath)

    print(data.shape, timestamps.shape)

    # 验证数据的合法性
    data = data[:, :nb_flow]
    # 若流量 < 0 置为 0
    data[data < 0] = 0

    # 构建[numpy.array()]
    data_all = [data]
    timestamps_all = [timestamps]

    # 最小最大归一化方法将数据缩放到[-1,1]范围中
    mmn = MinMaxNormalization()
    # 找训练数据集中最小最大值
    mmn.fit(data)

    # 所有数据映射到[-1, 1]中,data_all_mmn 是所有数据映射到[-1 , 1]
    data_all_mmn = []
    for d in data_all:
        data_all_mmn.append(mmn.transform(d))
    # 序列化对象,将对象obj保存到文件file中去,复原数据时可以从中读取该对象将数据映射回去
    fpkl = open('preprocessing.pkl', 'wb')
    for obj in [mmn]:
        pickle.dump(obj, fpkl)
    fpkl.close()

    # zip()打包为元组的列表 a=[1, 3, 5] b=[2, 4, 6]
    # list(zip(a,b)) = [(1,2), (3,4), (5,6)]
    # XC邻近性的输入, X趋势性周期性的输入, XT周期性的输入
    XC, XP, XT = [], [], []
    # 真实值,用来与预测值比较
    Y = []
    # 时间i
    timestamps_Y = []
    # data_all_mmn = [ 标准化的data]
    # timestamps_all = [ timestamps ]       -> zip() -> (标准化的data,timestamps)
    for data, timestamps in zip(data_all_mmn, timestamps_all):
        # 基于实例的数据集——>格式为(x,y)的序列,其中x是训练数据,y是数据对应日期。
        st = STMatrix(data, timestamps, T, CheckComplete=True)
        # _XC, _XP, _XT, _Y, _timestamps_Y, 分别对应邻近性,周期性,趋势性的依赖序列,当前时间的人群流量的真实值,对应的时间戳
        _XC, _XP, _XT, _Y, _timestamps_Y = st.create_dataset(
            len_closeness=len_closeness,
            len_period=len_period,
            len_trend=len_trend)
        # 将各类训练数据加入到列表中
        XC.append(_XC)
        XP.append(_XP)
        XT.append(_XT)
        Y.append(_Y)
        timestamps_Y += _timestamps_Y
    timestamps_Y = np.array(timestamps_Y)
    meta_feature = []
    if meta_data:
        # load time feature
        time_feature = timestamp2vec(timestamps_Y)
        meta_feature.append(time_feature)
    if meteorol_data:
        # load meteorol data 气象数据
        meteorol_feature = load_meteorol(timestamps_Y)
        meta_feature.append(meteorol_feature)

    meta_feature = np.hstack(
        meta_feature) if len(meta_feature) > 0 else np.asarray(meta_feature)
    metadata_dim = meta_feature.shape[1] if len(
        meta_feature.shape) > 1 else None
    # if metadata_dim < 1:
    # metadata_dim = None
    if meta_data and meteorol_data:
        print('time feature:', time_feature.shape, 'meteorol feature: ',
              meteorol_feature.shape, 'mete feature: ', meta_feature.shape)
    # 将各类数据分别堆砌为一列
    XC = np.vstack(XC)
    XP = np.vstack(XP)
    XT = np.vstack(XT)
    Y = np.vstack(Y)
    print("邻近性 XC shape: ", XC.shape, "周期性 XP shape: ", XP.shape,
          "趋势性 XT shape: ", XT.shape, "真实值 Y shape:", Y.shape)
    # 将各类数据划分为训练数据,测试数据

    len_train = int(Y.shape[0] * 0.8)

    index = [i for i in range(len(XC))]
    np.random.shuffle(index)
    #index = np.array(index)
    XC = XC[index]
    XP = XP[index]
    XT = XT[index]
    Y = Y[index]
    timestamps_Y = timestamps_Y[index]

    XC_train, XP_train, XT_train, Y_train = XC[:
                                               len_train], XP[:
                                                              len_train], XT[:
                                                                             len_train], Y[:
                                                                                           len_train]
    #XC_valid, XP_valid, XT_valid, Y_valid = XC[len_train:len_test], XP[len_train:len_test], XT[len_train:len_test], Y[len_train:len_test]
    XC_test, XP_test, XT_test, Y_test = XC[len_train:], XP[len_train:], XT[
        len_train:], Y[len_train:]
    timestamp_train, timestamp_test = timestamps_Y[:len_train], timestamps_Y[
        len_train:]

    X_train = []
    X_valid = []
    X_test = []
    # 将依赖序列的长度,与依赖序列的列表组成元组,并加入到X_train X_test
    for l, X_ in zip([len_closeness, len_period, len_trend],
                     [XC_train, XP_train, XT_train]):
        if l > 0:
            X_train.append(X_)

    for l, X_ in zip([len_closeness, len_period, len_trend],
                     [XC_test, XP_test, XT_test]):
        if l > 0:
            X_test.append(X_)

    if metadata_dim is not None:
        meta_feature_train,  meta_feature_test = meta_feature[:len_train],\
                                                meta_feature[len_train:]
        X_train.append(meta_feature_train)
        X_test.append(meta_feature_test)

    print("\n训练数据形状为:")
    for _X in X_train:
        print(_X.shape)
    print("\n测试数据形状为:")
    for _X in X_test:
        print(_X.shape)
    return X_train, Y_train, X_test,\
           Y_test, mmn, metadata_dim, timestamp_train, timestamp_test