Ejemplo n.º 1
0
def run_dcrnn(args):

    #Pick GPU to use. Activate tensorflow_gpu conda env
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_instance

    with open(args.config_filename) as f:
        config = yaml.load(f)
    tf_config = tf.ConfigProto()
    if args.use_cpu_only:
        tf_config = tf.ConfigProto(device_count={'GPU': 0})
    tf_config.gpu_options.allow_growth = True

    ### From the yaml file get the adjacency matrix
    graph_pkl_filename = config['data']['graph_pkl_filename']
    _, _, adj_mx = load_graph_data(graph_pkl_filename)
    with tf.Session(config=tf_config) as sess:
        supervisor = DCRNNSupervisor(adj_mx=adj_mx, **config)

        ### Load the current trained model, access filename from yaml file
        supervisor.load(sess, config['train']['model_filename'])

        ### Evaluate or perform prediction
        outputs = supervisor.evaluate(sess)
        np.savez_compressed(args.output_filename, **outputs)
        print('Predictions saved as {}.'.format(args.output_filename))
Ejemplo n.º 2
0
def main(args):
    
    #Pick GPU to use. Activate tensorflow_gpu conda env
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_instance
    
    ### Open model parameters file
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)
        
        ### Load adjacency matrix
        graph_pkl_filename = supervisor_config['data'].get('graph_pkl_filename')
        
        ### load the graph look at /lib/utils.py for this function
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(graph_pkl_filename)

        tf_config = tf.ConfigProto()
        if args.use_cpu_only:
            tf_config = tf.ConfigProto(device_count={'GPU': 0})
        tf_config.gpu_options.allow_growth = True
        
        ### Call the DCRNN supervisor class and start training
        with tf.Session(config=tf_config) as sess:
            supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config)

            supervisor.train(sess=sess)
Ejemplo n.º 3
0
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        SC_mx = load_graph_data(
            supervisor_config)  # Load structural connectivity matrix.

        tf_config = tf.ConfigProto()
        if args.use_cpu_only:
            tf_config = tf.ConfigProto(device_count={'GPU': 0})
        tf_config.gpu_options.allow_growth = True
        with tf.Session(config=tf_config) as sess:
            supervisor = DCRNNSupervisor(adj_mx=SC_mx, **supervisor_config)
            supervisor.train(sess=sess)

            if args.save_predictions:
                outputs, _ = supervisor.evaluate(sess=sess)

                print('Save outputs in: ', supervisor._log_dir)
                np.savez(supervisor._log_dir + '/outputs',
                         predictions=outputs['predictions'],
                         groundtruth=outputs['groundtruth'])

                plot_predictions(
                    log_dir=supervisor._log_dir,
                    dataset_dir=supervisor_config['data']['dataset_dir'])
Ejemplo n.º 4
0
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        SC_mx = load_graph_data(
            supervisor_config)  # Load structural connectivity matrix.

        if args.test_dataset:  # For evaluating the model on a different dataset.
            supervisor_config['data']['dataset_dir'] = args.test_dataset

        tf_config = tf.ConfigProto()
        if args.use_cpu_only:
            tf_config = tf.ConfigProto(device_count={'GPU': 0})
        tf_config.gpu_options.allow_growth = True
        with tf.Session(config=tf_config) as sess:
            supervisor = DCRNNSupervisor(adj_mx=SC_mx, **supervisor_config)
            supervisor.load(
                sess,
                supervisor_config['train']['model_filename'])  # Restore model.

            if args.save_predictions:
                outputs, _ = supervisor.evaluate(sess=sess)

                print('Save outputs in: ', supervisor._log_dir)
                np.savez(supervisor._log_dir + '/' + args.output_name,
                         predictions=outputs['predictions'],
                         groundtruth=outputs['groundtruth'])

                plot_predictions(
                    log_dir=supervisor._log_dir,
                    output_name=args.output_name,
                    dataset_dir=supervisor_config['data']['dataset_dir'])
Ejemplo n.º 5
0
def main(config):
    logger = config.get_logger('train')

    graph_pkl_filename = 'data/sensor_graph/adj_mx_unix.pkl'
    _, _, adj_mat = utils.load_graph_data(graph_pkl_filename)
    data = utils.load_dataset(
        dataset_dir='data/METR-LA',
        batch_size=config["arch"]["args"]["batch_size"],
        test_batch_size=config["arch"]["args"]["batch_size"])
    for k, v in data.items():
        if hasattr(v, 'shape'):
            print((k, v.shape))

    train_data_loader = data['train_loader']
    val_data_loader = data['val_loader']

    num_train_sample = data['x_train'].shape[0]
    num_val_sample = data['x_val'].shape[0]

    # get number of iterations per epoch for progress bar
    num_train_iteration_per_epoch = math.ceil(
        num_train_sample / config["arch"]["args"]["batch_size"])
    num_val_iteration_per_epoch = math.ceil(
        num_val_sample / config["arch"]["args"]["batch_size"])

    # setup data_loader instances
    # data_loader = config.initialize('data_loader', module_data)
    # valid_data_loader = data_loader.split_validation()

    # build model architecture, then print to console
    adj_arg = {"adj_mat": adj_mat}
    model = config.initialize('arch', module_arch, **adj_arg)
    # model = getattr(module_arch, config['arch']['type'])(config['arch']['args'], adj_arg)
    logger.info(model)

    # get function handles of loss and metrics
    loss = config.initialize('loss', module_metric,
                             **{"scaler": data['scaler']})
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.initialize('optimizer', torch.optim, trainable_params)

    lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler,
                                     optimizer)

    trainer = DCRNNTrainer(model,
                           loss,
                           metrics,
                           optimizer,
                           config=config,
                           data_loader=train_data_loader,
                           valid_data_loader=val_data_loader,
                           lr_scheduler=lr_scheduler,
                           len_epoch=num_train_iteration_per_epoch,
                           val_len_epoch=num_val_iteration_per_epoch)

    trainer.train()
Ejemplo n.º 6
0
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)

        split_into_subgraphs = bool(
            supervisor_config['data'].get('split_into_subgraphs'))
        if split_into_subgraphs:
            assert (args.subgraph_id != None,
                    'Enter a subgraph_id as python argument')
            subgraph_id = str(args.subgraph_id)
            print('Splitting into Sub-graphs: True')
            print('Current Sub-graph ID: ' + subgraph_id)
            adj_mx = partition_into_n_subgraphs(
                graph_pkl_filename, subgraph_id,
                int(supervisor_config['data'].get('number_of_subgraphs')))

            #Choosing the correct dataset directory for current subgraph
            supervisor_config['data']['dataset_dir'] = supervisor_config[
                'data'].get('dataset_dir') + subgraph_id

            #Choosing the correct number of nodes for current subgraph
            listofnodesizes = (
                supervisor_config['model'].get('num_nodes')).split(',')
            supervisor_config['model']['num_nodes'] = int(
                listofnodesizes[int(subgraph_id)])
        else:
            subgraph_id = str(subgraph_id)

        currentCuda.init()
        currentCuda.dcrnn_cudadevice = torch.device(
            "cuda:" +
            str(args.current_cuda_id) if torch.cuda.is_available() else "cpu")

        #Moving import here since the global variable for cuda device is declared above
        import model.pytorch.dcrnn_supervisor as dcrnn_supervisor

        supervisor = dcrnn_supervisor.DCRNNSupervisor(adj_mx=adj_mx,
                                                      subgraph_id=subgraph_id,
                                                      **supervisor_config)

        #supervisor.train(subgraph_identifier=subgraph_id)
        #Loading the previously trained model
        supervisor.load_model(subgraph_id=subgraph_id)

        #Evaluating the model finally and storing the results
        mean_score, outputs = supervisor.evaluate('test')
        output_filename = supervisor_config.get(
            'predictions_dir'
        ) + '/' + 'final_predictions' + subgraph_id + '.npz'
        np.savez_compressed(output_filename, **outputs)
        print("MAE : {}".format(mean_score))
        print('Predictions saved as {}.'.format(output_filename))
Ejemplo n.º 7
0
def read_adj(args):
    adj_mat_filename = args.paths['adj_mat_filename']
    if Path(adj_mat_filename).suffix in ['.pkl']:
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            adj_mat_filename)
    elif Path(adj_mat_filename).suffix in ['.csv']:
        adj_mx = np.loadtxt(adj_mat_filename, dtype=np.float32, delimiter=',')
    else:
        adj_mx = np.loadtxt(adj_mat_filename, dtype=np.float32, delimiter=' ')
    return adj_mx
Ejemplo n.º 8
0
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)

        supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config)

        supervisor.train()
def run_dcrnn(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)

        supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config)
        mean_score, outputs = supervisor.evaluate('test')
        np.savez_compressed(args.output_filename, **outputs)
        print("MAE : {}".format(mean_score))
        print('Predictions saved as {}.'.format(args.output_filename))
Ejemplo n.º 10
0
def main(args):
    with open(args.config_filename) as f:

        supervisor_config = yaml.load(f)
        graph_pkl_filename = supervisor_config['data'].get('graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(graph_pkl_filename)

        tf_config = tf.ConfigProto()
        if args.use_cpu_only:
            tf_config = tf.ConfigProto(device_count={'GPU': 0})
        tf_config.gpu_options.allow_growth = True
        with tf.Session(config=tf_config) as sess:
            supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config)
            supervisor.train(sess=sess)
Ejemplo n.º 11
0
def run_dcrnn(args):
    with open(args.config_filename) as f:
        config = yaml.safe_load(f)
    tf_config = tf.compat.v1.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    graph_pkl_filename = config['data']['graph_pkl_filename']
    _, _, adj_mx = load_graph_data(graph_pkl_filename)
    with tf.Session(config=tf_config) as sess:
        supervisor = DCRNNSupervisor(adj_mx=adj_mx, **config)
        supervisor.load(sess, config['train']['model_filename'])
        outputs = supervisor.print_datastream(sess)
        np.savez_compressed(args.output_filename + '.input.npz', **outputs)
        print('Evaluating...')
        supervisor.evaluate(sess)
Ejemplo n.º 12
0
def run_dcrnn(args):
    graph_pkl_filename = 'data/sensor_graph/adj_mx.pkl'
    with open(args.config_filename) as f:
        config = yaml.load(f)
    tf_config = tf.ConfigProto()
    if args.use_cpu_only:
        tf_config = tf.ConfigProto(device_count={'GPU': 0})
    tf_config.gpu_options.allow_growth = True
    _, _, adj_mx = load_graph_data(graph_pkl_filename)
    with tf.Session(config=tf_config) as sess:
        supervisor = DCRNNSupervisor(adj_mx=adj_mx, **config)
        supervisor.load(sess, config['train']['model_filename'])
        outputs = supervisor.evaluate(sess)
        np.savez_compressed(args.output_filename, **outputs)
        print('Predictions saved as {}.'.format(args.output_filename))
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename', 'data/sensor_graph/adj_mx_bay.pkl')
        # graph_pkl_filename = supervisor_config['data'].get('graph_pkl_filename',
        #                                                    'C:/Users/Administrator/Desktop/DCRNN_PyTorch-memoryefficiency/data/sensor_graph/adj_mx.pkl')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)

        # if args.use_cpu_only:
        #     tf_config = tf.ConfigProto(device_count={'GPU': 0})
        # with tf.Session(config=tf_config) as sess:
        supervisor = GARNNSupervisor(adj_mx=adj_mx, **supervisor_config)

        supervisor.train()
Ejemplo n.º 14
0
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)

        import os
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = True
        with tf.Session(config=tf_config) as sess:
            supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config)

            supervisor.train(sess=sess)
Ejemplo n.º 15
0
def main(args):
    print('main started with args: {}'.format(args))
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        add_prefix(args.train_local,supervisor_config,'base_dir')
        add_prefix(args.data_local,supervisor_config['data'],'dataset_dir')
        add_prefix(args.data_local,supervisor_config['data'],'graph_pkl_filename')
        add_prefix(args.data_local,supervisor_config['train'],'load_model_dir')

        graph_pkl_filename = supervisor_config['data'].get('graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(graph_pkl_filename)

        supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config)
        mean_score, outputs = supervisor.evaluate('test')
        np.savez_compressed(args.output_filename, **outputs)
        print("MAE : {}".format(mean_score))
        print('Predictions saved as {}.'.format(args.output_filename))
Ejemplo n.º 16
0
def main(args):
    print('main started with args: {}'.format(args))
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)
        add_prefix(args.train_local, supervisor_config, 'base_dir')
        add_prefix(args.data_local, supervisor_config['data'], 'dataset_dir')
        add_prefix(args.data_local, supervisor_config['data'],
                   'graph_pkl_filename')

        print('using supervisor_config: {}'.format(supervisor_config))
        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')

        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            os.path.join(args.data_local, graph_pkl_filename))

        supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config)

        supervisor.train()
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)
        data_type = args.config_filename.split('/')[-1].split('.')[0].split(
            '_')[-1]  #'bay' or 'la'
        supervisor = DCRNNSupervisor(data_type=data_type,
                                     LOAD_INITIAL=args.LOAD_INITIAL,
                                     adj_mx=adj_mx,
                                     **supervisor_config)

        if args.TEST_ONLY:
            supervisor.evaluate_test()
        else:
            supervisor.train()
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)
        if args.rep:
            supervisor_config['param']['rep'] = args.rep
            print('overwrite rep parameter with argument')

        graph_pkl_filename = supervisor_config['data'].get('graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(graph_pkl_filename)

        id_str = search_id(supervisor_config['alg'], supervisor_config['param'])
        model_dir = supervisor_config['train']['model_dir']
        supervisor_config['train']['model_dir'] = os.path.join(model_dir, id_str)
        dset_dir = supervisor_config['data']['dataset_dir']
        supervisor_config['data']['dataset_dir'] = os.path.join(dset_dir, id_str)

        supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config)

        supervisor.train()
Ejemplo n.º 19
0
def main(args):
    tf.reset_default_graph()
    with open(args.config_filename) as f:
        with tf.Graph().as_default() as g:
            supervisor_config = yaml.load(f)
            graph_pkl_filename = supervisor_config['data'].get(
                'graph_pkl_filename')
            if supervisor_config['data']['data_type'] == 'npz':
                sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
                    graph_pkl_filename)
            elif supervisor_config['data']['data_type'] == 'csv':
                adj_mx = load_graph_data_from_csv(
                    supervisor_config['data'].get('dataset_dir'))
            tf_config = tf.ConfigProto()
            if args.use_cpu_only:
                tf_config = tf.ConfigProto(device_count={'GPU': 0})
            tf_config.gpu_options.allow_growth = True
            #tf_config.gpu_options.per_process_gpu_memory_fraction = 1
            with tf.Session(config=tf_config) as sess:
                supervisor = DCRNNSupervisor(args=args,
                                             adj_mx=adj_mx,
                                             **supervisor_config)

                supervisor.train(sess=sess)
Ejemplo n.º 20
0
def predict(config_filename='data/model/dcrnn_highway_flask.yaml',
            current_cuda_id=0,
            use_cpu_only=False,
            subgraph_id=0):
    # get sensor data and save it into the test dataset dir
    data = request.get_json()
    sensor_data = np.array([data["sensor_data"]])

    with open(config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)

        split_into_subgraphs = bool(
            supervisor_config['data'].get('split_into_subgraphs'))
        if split_into_subgraphs:
            assert (subgraph_id != None,
                    'Enter a subgraph_id as python argument')
            subgraph_id = str(subgraph_id)
            print('Splitting into Sub-graphs: True')
            print('Current Sub-graph ID: ' + subgraph_id)
            adj_mx = partition_into_n_subgraphs(
                graph_pkl_filename, subgraph_id,
                int(supervisor_config['data'].get('number_of_subgraphs')))

            #Choosing the correct dataset directory for current subgraph
            supervisor_config['data']['dataset_dir'] = supervisor_config[
                'data'].get('dataset_dir') + subgraph_id

            #Choosing the correct number of nodes for current subgraph
            listofnodesizes = (
                supervisor_config['model'].get('num_nodes')).split(',')
            supervisor_config['model']['num_nodes'] = int(
                listofnodesizes[int(subgraph_id)])
        else:
            subgraph_id = str(subgraph_id)

        currentCuda.init()
        currentCuda.dcrnn_cudadevice = torch.device(
            "cuda:" +
            str(current_cuda_id) if torch.cuda.is_available() else "cpu")

        #Saving the JSON test information to npz in test dir. Bypassing the requirement for needing actual train and val dataset
        if not split_into_subgraphs:
            if not os.path.exists(
                    supervisor_config['data'].get('dataset_dir')):
                os.makedirs(supervisor_config['data'].get('dataset_dir'))
            np.savez_compressed(supervisor_config['data'].get('dataset_dir') +
                                '/' + 'test.npz',
                                x=sensor_data['x'],
                                y=sensor_data['y'],
                                x_offset=None,
                                y_offset=None)
            #These train and val are not used anywhere, but as assert
            np.savez_compressed(supervisor_config['data'].get('dataset_dir') +
                                '/' + 'train.npz',
                                x=sensor_data['x'],
                                y=sensor_data['y'],
                                x_offset=None,
                                y_offset=None)
            np.savez_compressed(supervisor_config['data'].get('dataset_dir') +
                                '/' + 'val.npz',
                                x=sensor_data['x'],
                                y=sensor_data['y'],
                                x_offset=None,
                                y_offset=None)

        #Moving import here since the global variable for cuda device is declared above
        import model.pytorch.dcrnn_supervisor as dcrnn_supervisor

        supervisor = dcrnn_supervisor.DCRNNSupervisor(adj_mx=adj_mx,
                                                      subgraph_id=subgraph_id,
                                                      **supervisor_config)

        #supervisor.train(subgraph_identifier=subgraph_id)
        #Loading the previously trained model
        supervisor.load_model(subgraph_id=subgraph_id)

        #Evaluating the model finally and storing the results
        mean_score, outputs = supervisor.evaluate('test')
        output_filename = supervisor_config.get(
            'predictions_dir'
        ) + '/' + 'final_predictions' + subgraph_id + '.npz'
        np.savez_compressed(output_filename, **outputs)
        print("MAE : {}".format(mean_score))
        print('Predictions saved as {}.'.format(output_filename))

        predictions = outputs['prediction']
        return jsonify({"prediction": predictions})
Ejemplo n.º 21
0
def main(args):
    cfg = read_cfg_file(args.config_filename)
    log_dir = _get_log_dir(cfg)
    log_level = cfg.get('log_level', 'INFO')

    logger = utils.get_logger(log_dir, __name__, 'info.log', level=log_level)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    #  all edge_index in same dataset is same
    # edge_index = adjacency_to_edge_index(adj_mx)  # alreay added self-loop
    logger.info(cfg)
    batch_size = cfg['data']['batch_size']
    test_batch_size = cfg['data']['test_batch_size']
    # edge_index = utils.load_pickle(cfg['data']['edge_index_pkl_filename'])
    hz = cfg['data'].get('name', 'nothz') == 'hz'

    adj_mx_list = []
    graph_pkl_filename = cfg['data']['graph_pkl_filename']

    if not isinstance(graph_pkl_filename, list):
        graph_pkl_filename = [graph_pkl_filename]

    src = []
    dst = []
    for g in graph_pkl_filename:
        if hz:
            adj_mx = utils.load_graph_data_hz(g)
        else:
            _, _, adj_mx = utils.load_graph_data(g)

        for i in range(len(adj_mx)):
            adj_mx[i, i] = 0
        adj_mx_list.append(adj_mx)

    adj_mx = np.stack(adj_mx_list, axis=-1)
    if cfg['model'].get('norm', False):
        print('row normalization')
        adj_mx = adj_mx / (adj_mx.sum(axis=0) + 1e-18)
    src, dst = adj_mx.sum(axis=-1).nonzero()
    edge_index = torch.tensor([src, dst], dtype=torch.long, device=device)
    edge_attr = torch.tensor(adj_mx[adj_mx.sum(axis=-1) != 0],
                             dtype=torch.float,
                             device=device)

    output_dim = cfg['model']['output_dim']
    for i in range(adj_mx.shape[-1]):
        logger.info(adj_mx[..., i])

    #  print(adj_mx.shape) (207, 207)

    if hz:
        dataset = utils.load_dataset_hz(**cfg['data'],
                                        scaler_axis=(0, 1, 2, 3))
    else:
        dataset = utils.load_dataset(**cfg['data'])
    for k, v in dataset.items():
        if hasattr(v, 'shape'):
            logger.info((k, v.shape))

    scaler = dataset['scaler']
    scaler_torch = utils.StandardScaler_Torch(scaler.mean,
                                              scaler.std,
                                              device=device)
    logger.info('scaler.mean:{}, scaler.std:{}'.format(scaler.mean,
                                                       scaler.std))

    model = Net(cfg).to(device)
    # model.apply(init_weights)
    criterion = nn.L1Loss(reduction='mean')
    optimizer = optim.Adam(model.parameters(),
                           lr=cfg['train']['base_lr'],
                           eps=cfg['train']['epsilon'])
    scheduler = StepLR2(optimizer=optimizer,
                        milestones=cfg['train']['steps'],
                        gamma=cfg['train']['lr_decay_ratio'],
                        min_lr=cfg['train']['min_learning_rate'])

    max_grad_norm = cfg['train']['max_grad_norm']
    train_patience = cfg['train']['patience']
    val_steady_count = 0
    last_val_mae = 1e6
    horizon = cfg['model']['horizon']

    for epoch in range(cfg['train']['epochs']):
        total_loss = 0
        i = 0
        begin_time = time.perf_counter()
        train_iterator = dataset['train_loader'].get_iterator()
        model.train()
        for _, (x, y, xtime, ytime) in enumerate(train_iterator):
            optimizer.zero_grad()
            y = y[:, :horizon, :, :output_dim]
            sequences, y = collate_wrapper(x=x,
                                           y=y,
                                           edge_index=edge_index,
                                           edge_attr=edge_attr,
                                           device=device)
            y_pred = model(sequences)
            y_pred = scaler_torch.inverse_transform(y_pred)
            y = scaler_torch.inverse_transform(y)
            loss = criterion(y_pred, y)
            loss.backward()
            clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            total_loss += loss.item()
            i += 1

        val_result = evaluate(model=model,
                              dataset=dataset,
                              dataset_type='val',
                              edge_index=edge_index,
                              edge_attr=edge_attr,
                              device=device,
                              output_dim=output_dim,
                              logger=logger,
                              detail=False,
                              cfg=cfg)
        val_mae, _, _ = val_result
        time_elapsed = time.perf_counter() - begin_time

        logger.info(('Epoch:{}, train_mae:{:.2f}, val_mae:{},'
                     'r_loss={:.2f},lr={},  time_elapsed:{}').format(
                         epoch, total_loss / i, val_mae, 0,
                         str(scheduler.get_lr()), time_elapsed))
        if last_val_mae > val_mae:
            logger.info('val_mae decreased from {:.2f} to {:.2f}'.format(
                last_val_mae, val_mae))
            last_val_mae = val_mae
            val_steady_count = 0
        else:
            val_steady_count += 1

        #  after per epoch, run evaluation on test dataset.
        if (epoch + 1) % cfg['train']['test_every_n_epochs'] == 0:
            evaluate(model=model,
                     dataset=dataset,
                     dataset_type='test',
                     edge_index=edge_index,
                     edge_attr=edge_attr,
                     device=device,
                     output_dim=output_dim,
                     logger=logger,
                     cfg=cfg)

        if (epoch + 1) % cfg['train']['save_every_n_epochs'] == 0:
            save_dir = log_dir
            if not os.path.exists(save_dir):
                os.mkdir(save_dir)
            config_path = os.path.join(save_dir,
                                       'config-{}.yaml'.format(epoch + 1))
            epoch_path = os.path.join(save_dir,
                                      'epoch-{}.pt'.format(epoch + 1))
            torch.save(model.state_dict(), epoch_path)
            with open(config_path, 'w') as f:
                from copy import deepcopy
                save_cfg = deepcopy(cfg)
                save_cfg['model']['save_path'] = epoch_path
                f.write(yaml.dump(save_cfg, Dumper=Dumper))

        if train_patience <= val_steady_count:
            logger.info('early stopping.')
            break
        scheduler.step()
Ejemplo n.º 22
0
    def __init__(self):
        with open('data/dcrnn_la.yaml') as f_la, open(
                'data/dcrnn_bay.yaml') as f_bay:
            config_la = yaml.load(f_la, Loader=yaml.FullLoader)
            config_bay = yaml.load(f_bay, Loader=yaml.FullLoader)

        sensor_ids1, sensor_id_to_ind1, adj_mx_la = load_graph_data(
            config_la['data'].get('graph_pkl_filename'))
        sensor_ids2, sensor_id_to_ind2, adj_mx_bay = load_graph_data(
            config_bay['data'].get('graph_pkl_filename'))

        self._kwargs = config_la
        self._data_kwargs = config_la.get('data')
        self._model_kwargs = config_la.get('model')
        self._data_kwargs2 = config_bay.get('data')
        self._model_kwargs2 = config_bay.get('model')
        self._train_kwargs = config_la.get('train')

        self.max_grad_norm = self._train_kwargs.get('max_grad_norm', 1.)

        # logging.
        self._log_dir = self._get_log_dir(config_la)
        self._writer = SummaryWriter('runs/' + self._log_dir)

        log_level = self._kwargs.get('log_level', 'INFO')
        self._logger = utils.get_logger(self._log_dir,
                                        __name__,
                                        'info.log',
                                        level=log_level)

        # data set
        self._data = utils.load_dataset(**self._data_kwargs)
        self._data2 = utils.load_dataset(**self._data_kwargs2)
        self.standard_scaler = self._data['scaler']
        self.standard_scaler2 = self._data2['scaler']

        self._logger.info('Setting: {}'.format(args.setting))
        self._logger.info("Party A trn samples: {}".format(
            self._data['train_loader'].size))
        self._logger.info("Party A vld samples: {}".format(
            self._data['val_loader'].size))
        self._logger.info("Party A tst samples: {}".format(
            self._data['test_loader'].size))
        self._logger.info("Party B trn samples: {}".format(
            self._data2['train_loader'].size))
        self._logger.info("Party B vld samples: {}".format(
            self._data2['val_loader'].size))
        self._logger.info("Party B tst samples: {}".format(
            self._data2['test_loader'].size))

        self.num_nodes = int(self._model_kwargs.get('num_nodes', 1))
        self.num_nodes2 = int(self._model_kwargs2.get('num_nodes', 1))
        self._logger.info("num_nodes: {}".format(self.num_nodes))
        self._logger.info("num_nodes2: {}".format(self.num_nodes2))

        self.input_dim = int(self._model_kwargs.get('input_dim', 1))
        self.seq_len = int(
            self._model_kwargs.get('seq_len'))  # for the encoder
        self.output_dim = int(self._model_kwargs.get('output_dim', 1))
        self.use_curriculum_learning = bool(
            self._model_kwargs.get('use_curriculum_learning', False))
        self.horizon = int(self._model_kwargs.get('horizon',
                                                  1))  # for the decoder

        # setup model
        dcrnn_model = DCRNNModel(adj_mx_la, self._logger, **self._model_kwargs)
        dcrnn_model2 = DCRNNModel(adj_mx_bay, self._logger,
                                  **self._model_kwargs2)

        if torch.cuda.is_available():
            # dcrnn_model = nn.DataParallel(dcrnn_model)
            # dcrnn_model2 = nn.DataParallel(dcrnn_model2)
            self.dcrnn_model = dcrnn_model.cuda()
            self.dcrnn_model2 = dcrnn_model2.cuda()
        else:
            self.dcrnn_model = dcrnn_model
            self.dcrnn_model2 = dcrnn_model2
        self._logger.info("Models created")
        self._logger.info('Local epochs:' + str(args.local_epochs))

        self._epoch_num = self._train_kwargs.get('epoch', 0)
        if self._epoch_num > 0:
            self.load_model(self._epoch_num)

        # use PySyft for SPDZ
        if args.setting == 'fedavg' and args.spdz:
            import syft as sy
            self._logger.info('Using SPDZ for FedAvg')
            hook = sy.TorchHook(torch)
            self.party_workers = [
                sy.VirtualWorker(hook, id="party{:d}".format(i))
                for i in range(2)
            ]
            self.crypto = sy.VirtualWorker(hook, id="crypto")

        # DP
        if args.dp:

            class HiddenPrints:
                def __enter__(self):
                    self._original_stdout = sys.stdout
                    sys.stdout = open(os.devnull, 'w')

                def __exit__(self, exc_type, exc_val, exc_tb):
                    sys.stdout.close()
                    sys.stdout = self._original_stdout

            def find_sigma(eps, batches_per_lot, dataset_size):
                lotSize = batches_per_lot * args.batch_size  # L
                N = dataset_size
                delta = min(10**(-5), 1 / N)
                lotsPerEpoch = N / lotSize
                q = lotSize / N  # Sampling ratio
                T = args.epochs * lotsPerEpoch  # Total number of lots

                def compute_dp_sgd_wrapper(_sigma):
                    with HiddenPrints():
                        return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
                            n=N,
                            batch_size=lotSize,
                            noise_multiplier=_sigma,
                            epochs=args.epochs,
                            delta=delta)[0] - args.epsilon

                sigma = newton(compute_dp_sgd_wrapper, x0=0.5,
                               tol=1e-4)  # adjust x0 to avoid error
                with HiddenPrints():
                    actual_eps = compute_dp_sgd_privacy.compute_dp_sgd_privacy(
                        n=N,
                        batch_size=lotSize,
                        noise_multiplier=sigma,
                        epochs=args.epochs,
                        delta=delta)[0]
        #         print('Batches_per_lot={}, q={}, T={}, sigma={}'.format(batches_per_lot, q, T, sigma))
        #         print('actual epslion = {}'.format(actual_eps))
                return sigma

            self._logger.info('Epsilon: ' + str(args.epsilon))
            self._logger.info('Lotsize_scaler: ' + str(args.lotsize_scaler))
            lotsizes = [
                N**.5 * args.lotsize_scaler for N in [
                    self._data['train_loader'].size,
                    self._data2['train_loader'].size
                ]
            ]
            batches_per_lot_list = list(
                map(lambda lotsize: max(round(lotsize / args.batch_size), 1),
                    lotsizes))
            batches_per_lot_list = [
                min(bpl, loader_len)
                for bpl, loader_len in zip(batches_per_lot_list, [
                    self._data['train_loader'].num_batch,
                    self._data2['train_loader'].num_batch
                ])
            ]
            self._logger.info('Batches per lot: ' + str(batches_per_lot_list))
            sigma_list = [
                find_sigma(args.epsilon, bpl, N)
                for bpl, N in zip(batches_per_lot_list, [
                    self._data['train_loader'].size,
                    self._data2['train_loader'].size
                ])
            ]
            self._logger.info('Sigma: ' + str(sigma_list))

            for mod, bpl, sig in zip([self.dcrnn_model, self.dcrnn_model2],
                                     batches_per_lot_list, sigma_list):
                mod.batch_per_lot = bpl
                mod.sigma = sig

            self.dcrnn_model.batch_per_lot = batches_per_lot_list[0]
            self.dcrnn_model.sigma = sigma_list[0]
            self.dcrnn_model2.batch_per_lot = batches_per_lot_list[1]
            self.dcrnn_model2.sigma = sigma_list[1]

            self._lastNoiseShape = None
            self._noiseToAdd = None
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)
        supervisor_config['model']['num_nodes'] = num_nodes = len(sensor_ids)

        # Data preprocessing
        traffic_df_filename = supervisor_config['data']['hdf_filename']
        df_data = pd.read_hdf(traffic_df_filename)
        #df_data = df_data.iloc[int(df_data.shape[0]/3):,:]
        validation_ratio = supervisor_config.get('data').get(
            'validation_ratio')
        test_ratio = supervisor_config.get('data').get('test_ratio')
        df_train, df_val, df_test = train_val_test_split(
            df_data, val_ratio=validation_ratio, test_ratio=test_ratio)

        batch_size = supervisor_config.get('data').get('batch_size')
        val_batch_size = supervisor_config.get('data').get('val_batch_size')
        test_batch_size = supervisor_config.get('data').get('test_batch_size')
        horizon = supervisor_config.get('model').get('horizon')
        seq_len = supervisor_config.get('model').get('seq_len')
        scaler = StandardScaler(mean=df_train.values.mean(),
                                std=df_train.values.std())

        data_train = generate_seq2seq_data(df_train, batch_size, seq_len,
                                           horizon, num_nodes, 'train', scaler)
        data_val = generate_seq2seq_data(df_val, val_batch_size, seq_len,
                                         horizon, num_nodes, 'val', scaler)
        data_train.update(data_val)
        #data_train['scaler'] = scaler

        data_test = generate_seq2seq_data(df_test, test_batch_size, seq_len,
                                          horizon, num_nodes, 'test', scaler)
        #data_test['scaler'] = scaler

        tf_config = tf.ConfigProto()
        if args.use_cpu_only:
            tf_config = tf.ConfigProto(device_count={'GPU': 0})
        tf_config.gpu_options.allow_growth = True
        with tf.Session(config=tf_config) as sess:
            supervisor = DCRNNSupervisor(adj_mx, data_train, supervisor_config)

            data_tag = supervisor_config.get('data').get('dataset_dir')
            folder = data_tag + '/model/'
            if not os.path.exists(folder):
                os.makedirs(folder)
            # Train
            supervisor.train(sess=sess)

            # Test
            yaml_files = glob.glob('%s/model/*/*.yaml' % data_tag,
                                   recursive=True)
            yaml_files.sort(key=os.path.getmtime)
            config_filename = yaml_files[-1]  #'config_%d.yaml' % config_id

            with open(config_filename) as f:
                config = yaml.load(f)
            # Load model and evaluate
            supervisor.load(sess, config['train']['model_filename'])
            y_preds = supervisor.evaluate(sess, data_test)

            n_test_samples = data_test['y_test'].shape[0]
            folder = data_tag + '/results/'
            if not os.path.exists(folder):
                os.makedirs(folder)
            for horizon_i in range(data_test['y_test'].shape[1]):
                y_pred = scaler.inverse_transform(y_preds[:, horizon_i, :, 0])
                eval_dfs = df_test[seq_len + horizon_i:seq_len + horizon_i +
                                   n_test_samples]
                df = pd.DataFrame(y_pred,
                                  index=eval_dfs.index,
                                  columns=eval_dfs.columns)
                #df = pd.DataFrame(y_pred, columns=df_test.columns)
                filename = os.path.join(
                    '%s/results/' % data_tag,
                    'dcrnn_speed_prediction_%s.h5' % str(horizon_i + 1))
                df.to_hdf(filename, 'results')

            print(
                'Predictions saved as %s/results/dcrnn_prediction_[1-12].h5...'
                % data_tag)
Ejemplo n.º 24
0
    def __init__(self, is_training, batch_size, scaler, adj_matrix_file,
                 **model_kwargs):
        # Scaler for data normalization.
        self._scaler = scaler

        # Train and loss
        self._loss = None
        self._mae = None
        self._train_op = None

        max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 0))
        cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000))
        filter_type = model_kwargs.get('filter_type', 'laplacian')

        networkType = model_kwargs.get('network', 'gconv')  # fc/gconv
        matrixType = model_kwargs.get('weightMatrix')  # a/d
        attention = model_kwargs.get('attention')

        horizon = int(model_kwargs.get('horizon', 1))
        max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0))
        num_nodes = int(model_kwargs.get('num_nodes', 1))
        num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1))
        rnn_units = int(model_kwargs.get('rnn_units'))
        seq_len = int(model_kwargs.get('seq_len'))
        use_curriculum_learning = bool(
            model_kwargs.get('use_curriculum_learning', False))
        input_dim = int(model_kwargs.get('input_dim', 1))
        output_dim = int(model_kwargs.get('output_dim', 1))
        aux_dim = input_dim - output_dim

        _, _, adj_mx = load_graph_data(adj_matrix_file)

        graphEmbedFile = None
        if networkType == 'fc':
            graphEmbedFile = model_kwargs.get('graphEmbedFile')
        # input_dim = 2
        # output_dim = 1
        # Input (batch_size, timesteps, num_sensor, input_dim)
        # print(batch_size, seq_len, num_nodes, input_dim)
        # 64 12 207 2
        # Batch size is a term used in machine learning and refers to the number of training examples utilised in one iteration.
        self._inputs = tf.placeholder(tf.float32,
                                      shape=(batch_size, seq_len, num_nodes,
                                             input_dim),
                                      name='inputs')
        # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension.
        self._labels = tf.placeholder(tf.float32,
                                      shape=(batch_size, horizon, num_nodes,
                                             input_dim),
                                      name='labels')

        # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim))
        GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim))

        cell = DCGRUCell(rnn_units,
                         adj_mx,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=num_nodes,
                         network_type=networkType,
                         graphEmbedFile=graphEmbedFile,
                         filter_type=filter_type)
        cell_with_projection = DCGRUCell(rnn_units,
                                         adj_mx,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=num_nodes,
                                         network_type=networkType,
                                         graphEmbedFile=graphEmbedFile,
                                         num_proj=output_dim,
                                         filter_type=filter_type)
        encoding_cells = [cell] * num_rnn_layers
        decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection]
        # projection is for the last step of decoding
        encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells,
                                                     state_is_tuple=True)
        decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells,
                                                     state_is_tuple=True)
        # print('We have initiated the cells.')

        global_step = tf.train.get_or_create_global_step()
        # Outputs: (batch_size, timesteps, num_nodes, output_dim)
        with tf.variable_scope('DCRNN_SEQ'):
            # What are the inputs and labels??

            # labels are ground truth

            # What is input_dim and output_dim
            # input_dim = 2
            # output_dim = 1
            inputs = tf.unstack(tf.reshape(
                self._inputs, (batch_size, seq_len, num_nodes * input_dim)),
                                axis=1)
            labels = tf.unstack(
                tf.reshape(self._labels[..., :output_dim],
                           (batch_size, horizon, num_nodes * output_dim)),
                axis=1)
            if aux_dim > 0:
                aux_info = tf.unstack(self._labels[..., output_dim:], axis=1)
                aux_info.insert(0, None)
            labels.insert(0, GO_SYMBOL)

            # print('Did we arrive here? Yes we did.')

            def _loop_function(prev, i):
                if is_training:
                    # Return either the model's prediction or the previous ground truth in training.
                    if use_curriculum_learning:
                        c = tf.random_uniform((), minval=0, maxval=1.)
                        threshold = self._compute_sampling_threshold(
                            global_step, cl_decay_steps)
                        result = tf.cond(tf.less(c, threshold),
                                         lambda: labels[i], lambda: prev)
                    else:
                        result = labels[i]
                else:
                    # Return the prediction of the model in testing.
                    result = prev
                # print(result.shape)
                # exit()
                # (64, 207)
                if False and aux_dim > 0:
                    result = tf.reshape(result,
                                        (batch_size, num_nodes, output_dim))
                    # print(result.shape)
                    # (64, 207, 1)
                    result = tf.concat([result, aux_info[i]], axis=-1)
                    # print(result.shape)
                    # (64, 207, 2)
                    result = tf.reshape(result,
                                        (batch_size, num_nodes * input_dim))
                    # print(result.shape)
                    # print(result.shape)
                    # (64, 414)
                return result

            # tf.contrib.rnn.static_rnn: https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/contrib/rnn/static_rnn
            # Creates a recurrent neural network specified by RNNCell: cell.
            # _gconv is called several times in this step
            _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells,
                                                     inputs,
                                                     dtype=tf.float32)
            # exit()
            # ****** HaHa ****** appeared 24 times
            # exit()
            # outputs is a list
            # Inside the decoder function, there is a loop function that probably propogates in the rnn structure
            # there are many printouts for calling the cells as a function, in the _gconv

            # outputs is of 13 such rnn cells
            # <tf.Tensor 'Train/DCRNN/DCRNN_SEQ/rnn_decoder/rnn_decoder/multi_rnn_cell/cell_1_12/dcgru_cell/projection/Reshape_1:0' shape=(64, 207) dtype=float32>

            # final_state is of 2 such rnn cells
            # <tf.Tensor 'Train/DCRNN/DCRNN_SEQ/rnn_decoder/rnn_decoder/multi_rnn_cell/cell_0_12/dcgru_cell/add:0' shape=(64, 13248) dtype=float32>
            # print('We are now in decoding')
            # tf.contrib.legacy_seq2seq.rnn_decoder: https://www.tensorflow.org/api_docs/python/tf/contrib/legacy_seq2seq/rnn_decoder
            # RNN decoder for the sequence-to-sequence model.
            # _gconv is called several times in this step
            outputs, final_state = legacy_seq2seq.rnn_decoder(
                labels,
                enc_state,
                decoding_cells,
                loop_function=_loop_function)

        # print("Did we arrive here? No we didn't.")
        # Project the output to output_dim.
        # https://www.tensorflow.org/api_docs/python/tf/stack
        # Why remove the last element?
        outputs = tf.stack(outputs[:-1], axis=1)
        # outputs is not a list anymore, but a stacked tensor
        self._outputs = tf.reshape(
            outputs, (batch_size, horizon, num_nodes, output_dim),
            name='outputs')
        self._merged = tf.summary.merge_all()