import os from dP_ResNet import load_stdata from dP_ResNet.config import Config import tensorly as tl import numpy as np import h5py as h5 from tensorly.decomposition import tucker,non_negative_tucker,non_negative_parafac np.random.seed(1337) # 保证随机数的可复现性 DATAPATH = Config().DATAPATH mapHeight = 16 mapWidth = 16 filePath = "../datasets/JN_Fill_2017-2020_M{}x{}_Power.h5".format(mapHeight,mapWidth) data, timestamps = load_stdata(filePath) print(data[0][0]) data_=data.reshape(data.shape[0],data.shape[2],data.shape[3]) data__=data_.copy() print("--*" * 10) print(data__[0])
from dP_ResNet.config import Config from dP_ResNet import metrics, PowerJN from keras import backend as K from Predict.TrainDP_ResNet import build_model import matplotlib.pyplot as plt from keras.utils.vis_utils import plot_model from dP_ResNet.metrics import mse, rmse K.set_image_data_format('channels_first') plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号 DATAPATH = Config().DATAPATH # 获取训练数据的地址 nb_epoch = 500 # 选取部分训练数据作为测试数据,训练的册数 batch_size = 32 # 批大小 T = 1 # 时间间隔数 len_closeness = 3 # 邻近性依赖序列的长度 # 只考虑邻近性 len_period = 3 # 周期性依赖序列的长度 len_trend = 3 # 趋势性依赖序列的长度 nb_residual_unit = 4 # 残差单元的数量 nb_flow = 1 # 电量维度 #城市被划分为16 * 16的区域 map_height, map_width = 16, 16 X_train, Y_train, X_test, \ Y_test, mmn, metadata_dim, timestamp_train, \
from keras import backend as K from dP_ResNet.metrics import rmse from keras.utils import plot_model K.set_image_data_format('channels_first') ''' from keras.backend.tensorflow_backend import set_session import tensorflow as tf config=tf.ConfigProto() config.gpu_options.allocator_type='BFC' config.gpu_options.per_process_gpu_memory_fraction=0.3 config.gpu_options.allow_growth=True set_session(tf.Session(config=config)) ''' DATAPATH = Config().DATAPATH # 获取训练数据的地址 congig绝对路径 nb_epoch = 500 # 选取部分训练数据作为测试数据,训练的册数 # nb_epoch = 700 nb_epoch_cont = 100 # 测试数据,训练数据进行验证 batch_size = 32 # 批大小 T = 1 # 一天中的时间间隔数 lr = 0.0002 # adam的常设0.001 len_closeness = 3 # 邻近性依赖序列的长度 # 只考虑邻近性 len_period = 0 # 周期性依赖序列的长度 len_trend = 3 # 趋势性依赖序列的长度 nb_residual_unit = 3 # 残差单元的数量 3层 nb_flow = 1 # 电量维度
def load_data(T=1, nb_flow=1, len_closeness=3, len_period=3, len_trend=3, preprocess_name='preprocessing.pkl', meta_data=False, meteorol_data=False): ''' :param T:周期数,一天划分为T个时间片段 1 :param nb_flow:流的种类,这里只有日发电量 :param len_closeness:邻近性依赖序列的长度 3 :param len_period:周期性依赖序列的长度 0 :param len_trend:趋势性依赖序列的长度 0 :param len_test:测试数据的长度 :return: ''' assert (len_closeness + len_period + len_trend > 0) # 加载数据,data为城市中每个格子的发电量。timestamps为对应的时间 DATAPATH = Config().DATAPATH FilePath = os.path.join(DATAPATH, 'JN_Fill_2017-2020_M16x16_Power_Decomposition.h5') #FilePath=os.path.join(DATAPATH, 'JN_Fill_2017-2020_M16x16_Power2.h5') print("加载文件:", FilePath) data, timestamps = load_stdata(FilePath) print(data.shape, timestamps.shape) # 验证数据的合法性 data = data[:, :nb_flow] # 若流量 < 0 置为 0 data[data < 0] = 0 # 构建[numpy.array()] data_all = [data] timestamps_all = [timestamps] # 最小最大归一化方法将数据缩放到[-1,1]范围中 mmn = MinMaxNormalization() # 找训练数据集中最小最大值 mmn.fit(data) # 所有数据映射到[-1, 1]中,data_all_mmn 是所有数据映射到[-1 , 1] data_all_mmn = [] for d in data_all: data_all_mmn.append(mmn.transform(d)) # 序列化对象,将对象obj保存到文件file中去,复原数据时可以从中读取该对象将数据映射回去 fpkl = open('preprocessing.pkl', 'wb') for obj in [mmn]: pickle.dump(obj, fpkl) fpkl.close() # zip()打包为元组的列表 a=[1, 3, 5] b=[2, 4, 6] # list(zip(a,b)) = [(1,2), (3,4), (5,6)] # XC邻近性的输入, X趋势性周期性的输入, XT周期性的输入 XC, XP, XT = [], [], [] # 真实值,用来与预测值比较 Y = [] # 时间i timestamps_Y = [] # data_all_mmn = [ 标准化的data] # timestamps_all = [ timestamps ] -> zip() -> (标准化的data,timestamps) for data, timestamps in zip(data_all_mmn, timestamps_all): # 基于实例的数据集——>格式为(x,y)的序列,其中x是训练数据,y是数据对应日期。 st = STMatrix(data, timestamps, T, CheckComplete=True) # _XC, _XP, _XT, _Y, _timestamps_Y, 分别对应邻近性,周期性,趋势性的依赖序列,当前时间的人群流量的真实值,对应的时间戳 _XC, _XP, _XT, _Y, _timestamps_Y = st.create_dataset( len_closeness=len_closeness, len_period=len_period, len_trend=len_trend) # 将各类训练数据加入到列表中 XC.append(_XC) XP.append(_XP) XT.append(_XT) Y.append(_Y) timestamps_Y += _timestamps_Y timestamps_Y = np.array(timestamps_Y) meta_feature = [] if meta_data: # load time feature time_feature = timestamp2vec(timestamps_Y) meta_feature.append(time_feature) if meteorol_data: # load meteorol data 气象数据 meteorol_feature = load_meteorol(timestamps_Y) meta_feature.append(meteorol_feature) meta_feature = np.hstack( meta_feature) if len(meta_feature) > 0 else np.asarray(meta_feature) metadata_dim = meta_feature.shape[1] if len( meta_feature.shape) > 1 else None # if metadata_dim < 1: # metadata_dim = None if meta_data and meteorol_data: print('time feature:', time_feature.shape, 'meteorol feature: ', meteorol_feature.shape, 'mete feature: ', meta_feature.shape) # 将各类数据分别堆砌为一列 XC = np.vstack(XC) XP = np.vstack(XP) XT = np.vstack(XT) Y = np.vstack(Y) print("邻近性 XC shape: ", XC.shape, "周期性 XP shape: ", XP.shape, "趋势性 XT shape: ", XT.shape, "真实值 Y shape:", Y.shape) # 将各类数据划分为训练数据,测试数据 len_train = int(Y.shape[0] * 0.8) index = [i for i in range(len(XC))] np.random.shuffle(index) #index = np.array(index) XC = XC[index] XP = XP[index] XT = XT[index] Y = Y[index] timestamps_Y = timestamps_Y[index] XC_train, XP_train, XT_train, Y_train = XC[: len_train], XP[: len_train], XT[: len_train], Y[: len_train] #XC_valid, XP_valid, XT_valid, Y_valid = XC[len_train:len_test], XP[len_train:len_test], XT[len_train:len_test], Y[len_train:len_test] XC_test, XP_test, XT_test, Y_test = XC[len_train:], XP[len_train:], XT[ len_train:], Y[len_train:] timestamp_train, timestamp_test = timestamps_Y[:len_train], timestamps_Y[ len_train:] X_train = [] X_valid = [] X_test = [] # 将依赖序列的长度,与依赖序列的列表组成元组,并加入到X_train X_test for l, X_ in zip([len_closeness, len_period, len_trend], [XC_train, XP_train, XT_train]): if l > 0: X_train.append(X_) for l, X_ in zip([len_closeness, len_period, len_trend], [XC_test, XP_test, XT_test]): if l > 0: X_test.append(X_) if metadata_dim is not None: meta_feature_train, meta_feature_test = meta_feature[:len_train],\ meta_feature[len_train:] X_train.append(meta_feature_train) X_test.append(meta_feature_test) print("\n训练数据形状为:") for _X in X_train: print(_X.shape) print("\n测试数据形状为:") for _X in X_test: print(_X.shape) return X_train, Y_train, X_test,\ Y_test, mmn, metadata_dim, timestamp_train, timestamp_test