def get_geo_feature(dataset): station_map, station_loc, station_geo = utils.load_h5( os.path.join(DATA_PATH, 'geo.h5'), ['station_map', 'station', 'geo_feature']) station_geo[np.isnan(station_geo)] = 0 loc = (station_loc - np.mean(station_loc, axis=0)) / np.std( station_loc, axis=0) #[num_station, 2] city = city_of_station(station_map) # [num_station, num_city] geo = geo_transform(station_geo) #[num_station, num_geo_feature] # feature = np.concatenate((loc, city, geo), axis=1) feature = np.concatenate((loc, geo), axis=1) #[num_station, num_geo_feature (26)] # feature = loc # calculate loc of each city num_city = city.shape[1] city_loc = np.zeros([num_city, 2]) for i in range(num_city): start = np.where(city[:, i] == 1)[0][0] end = np.where(city[:, i] == 1)[0][-1] + 1 city_loc[i] = np.mean(station_loc[start:end, :], axis=0) graph = {} graph['pool'] = utils.build_graph_pool(city) graph['update'] = utils.build_graph_update(city) graph['low'] = utils.build_graph_low(station_map, station_loc, city, dataset['n_neighbors']) graph['agg'] = utils.build_graph_agg(city_loc, dataset['n_neighbors']) return feature, graph #[num_station, num_geo_feature (26)], #[n, n], list, list
def get_geo_feature(dataset): geo = utils.load_h5(os.path.join(DATA_PATH, 'BJ_FEATURE.h5'), ['embeddings']) row, col, _ = geo.shape geo = np.reshape(geo, (row * col, -1)) geo = (geo - np.mean(geo, axis=0)) / (np.std(geo, axis=0) + 1e-8) return geo
def get_graph(): adj_feature = utils.load_h5(os.path.join(DATA_PATH, 'BJ_GRAPH.h5'), ['data']) src, dst = np.where(np.sum(adj_feature, axis=2) > 0) values = adj_feature[src, dst] adj_feature = (adj_feature - np.mean(values, axis=0)) / (np.std(values, axis=0) + 1e-8) return adj_feature, src, dst
def dataloader(dataset): data = utils.load_h5(os.path.join(DATA_PATH, 'data_17.h5'), ['data']) data[data > 500] = np.nan n_timestamp = data.shape[0] num_train = int(n_timestamp * TRAIN_PROP) num_eval = int(n_timestamp * EVAL_PROP) num_test = n_timestamp - num_train - num_eval return data[:num_train], data[num_train:num_train + num_eval], data[-num_test:]
def dataloader(dataset): data = utils.load_h5(os.path.join(DATA_PATH, 'BJ_FLOW.h5'), ['data']) days, hours, rows, cols, _ = data.shape data = np.reshape(data, (days * hours, rows * cols, -1)) n_timestamp = data.shape[0] num_train = int(n_timestamp * TRAIN_PROP) num_eval = int(n_timestamp * EVAL_PROP) num_test = n_timestamp - num_train - num_eval return data[:num_train], data[num_train:num_train + num_eval], data[-num_test:]
def load_flow(): data = utils.load_h5(os.path.join(DATA_PATH, 'NYC_FLOW.h5'), ['data']) print('data shape', data.shape) days = data.shape[0] n_timestamp = data.shape[0] num_train = int(n_timestamp * TRAIN_PROP) num_eval = int(n_timestamp * EVAL_PROP) num_test = n_timestamp - num_train - num_eval return data[:num_train], data[num_train:num_train + num_eval], data[-num_test:]
import os import h5py import numpy as np from data import utils DATA_PATH = '../data/data_all' SAVE_PATH = '../data/' data = utils.load_h5(os.path.join(DATA_PATH, 'data_17.h5'), ['data']) station_map, station_loc, station_geo = utils.load_h5( os.path.join(DATA_PATH, 'geo.h5'), ['station_map', 'station', 'geo_feature']) def in_huabei(loc): return 34.109 < loc[0] < 41.691 and 110.938 < loc[1] < 122.321 def prop_missing(data): missing = np.isnan(data).sum() count = data.size return float(missing) / float(count) index = [] n = station_map.shape[0] for i in range(n): # print(i, data[:,i].shape, prop_missing(data[:,i,0])) if in_huabei(station_loc[i]) and prop_missing(data[:, i, 0]) < 0.3: