Esempio n. 1
0
def get_geo_feature(dataset):
    station_map, station_loc, station_geo = utils.load_h5(
        os.path.join(DATA_PATH, 'geo.h5'),
        ['station_map', 'station', 'geo_feature'])

    station_geo[np.isnan(station_geo)] = 0
    loc = (station_loc - np.mean(station_loc, axis=0)) / np.std(
        station_loc, axis=0)  #[num_station, 2]
    city = city_of_station(station_map)  # [num_station, num_city]
    geo = geo_transform(station_geo)  #[num_station, num_geo_feature]

    # feature = np.concatenate((loc, city, geo), axis=1)
    feature = np.concatenate((loc, geo),
                             axis=1)  #[num_station, num_geo_feature (26)]
    # feature = loc

    # calculate loc of each city
    num_city = city.shape[1]
    city_loc = np.zeros([num_city, 2])
    for i in range(num_city):
        start = np.where(city[:, i] == 1)[0][0]
        end = np.where(city[:, i] == 1)[0][-1] + 1
        city_loc[i] = np.mean(station_loc[start:end, :], axis=0)

    graph = {}
    graph['pool'] = utils.build_graph_pool(city)
    graph['update'] = utils.build_graph_update(city)
    graph['low'] = utils.build_graph_low(station_map, station_loc, city,
                                         dataset['n_neighbors'])
    graph['agg'] = utils.build_graph_agg(city_loc, dataset['n_neighbors'])
    return feature, graph  #[num_station, num_geo_feature (26)],   #[n, n], list, list
Esempio n. 2
0
def get_geo_feature(dataset):
    geo = utils.load_h5(os.path.join(DATA_PATH, 'BJ_FEATURE.h5'),
                        ['embeddings'])
    row, col, _ = geo.shape
    geo = np.reshape(geo, (row * col, -1))

    geo = (geo - np.mean(geo, axis=0)) / (np.std(geo, axis=0) + 1e-8)
    return geo
Esempio n. 3
0
def get_graph():
    adj_feature = utils.load_h5(os.path.join(DATA_PATH, 'BJ_GRAPH.h5'),
                                ['data'])
    src, dst = np.where(np.sum(adj_feature, axis=2) > 0)

    values = adj_feature[src, dst]
    adj_feature = (adj_feature -
                   np.mean(values, axis=0)) / (np.std(values, axis=0) + 1e-8)

    return adj_feature, src, dst
Esempio n. 4
0
def dataloader(dataset):
    data = utils.load_h5(os.path.join(DATA_PATH, 'data_17.h5'), ['data'])

    data[data > 500] = np.nan

    n_timestamp = data.shape[0]
    num_train = int(n_timestamp * TRAIN_PROP)
    num_eval = int(n_timestamp * EVAL_PROP)
    num_test = n_timestamp - num_train - num_eval

    return data[:num_train], data[num_train:num_train +
                                  num_eval], data[-num_test:]
Esempio n. 5
0
def dataloader(dataset):
    data = utils.load_h5(os.path.join(DATA_PATH, 'BJ_FLOW.h5'), ['data'])
    days, hours, rows, cols, _ = data.shape

    data = np.reshape(data, (days * hours, rows * cols, -1))

    n_timestamp = data.shape[0]
    num_train = int(n_timestamp * TRAIN_PROP)
    num_eval = int(n_timestamp * EVAL_PROP)
    num_test = n_timestamp - num_train - num_eval

    return data[:num_train], data[num_train:num_train +
                                  num_eval], data[-num_test:]
Esempio n. 6
0
def load_flow():
    data = utils.load_h5(os.path.join(DATA_PATH, 'NYC_FLOW.h5'), ['data'])
    print('data shape', data.shape)

    days = data.shape[0]

    n_timestamp = data.shape[0]
    num_train = int(n_timestamp * TRAIN_PROP)
    num_eval = int(n_timestamp * EVAL_PROP)
    num_test = n_timestamp - num_train - num_eval

    return data[:num_train], data[num_train:num_train +
                                  num_eval], data[-num_test:]
Esempio n. 7
0
import os
import h5py
import numpy as np

from data import utils

DATA_PATH = '../data/data_all'
SAVE_PATH = '../data/'

data = utils.load_h5(os.path.join(DATA_PATH, 'data_17.h5'), ['data'])

station_map, station_loc, station_geo = utils.load_h5(
    os.path.join(DATA_PATH, 'geo.h5'),
    ['station_map', 'station', 'geo_feature'])


def in_huabei(loc):
    return 34.109 < loc[0] < 41.691 and 110.938 < loc[1] < 122.321


def prop_missing(data):
    missing = np.isnan(data).sum()
    count = data.size
    return float(missing) / float(count)


index = []
n = station_map.shape[0]
for i in range(n):
    # print(i, data[:,i].shape, prop_missing(data[:,i,0]))
    if in_huabei(station_loc[i]) and prop_missing(data[:, i, 0]) < 0.3: