Ejemplo n.º 1
0
def test(adj):
    ''' test on GCN '''

    adj = normalize_adj_tensor(adj)
    gcn = GCN(nfeat=features.shape[1],
              nhid=args.hidden,
              nclass=labels.max().item() + 1,
              dropout=0.5)

    if device != 'cpu':
        gcn = gcn.to(device)

    optimizer = optim.Adam(gcn.parameters(), lr=args.lr, weight_decay=5e-4)

    gcn.train()

    for epoch in range(args.epochs):
        optimizer.zero_grad()
        output = gcn(features, adj)
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])
        loss_train.backward()
        optimizer.step()

    gcn.eval()
    output = gcn(features, adj)

    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    # print("Test set results:",
    #       "loss= {:.4f}".format(loss_test.item()),
    #       "accuracy= {:.4f}".format(acc_test.item()))

    return acc_test.item()
Ejemplo n.º 2
0
def main(args):
    # convert boolean type for args
    assert args.self_loop in ['True', 'False'], [
        "Only True or False for self_loop, get ", args.self_loop
    ]
    assert args.use_layernorm in ['True', 'False'], [
        "Only True or False for use_layernorm, get ", args.use_layernorm
    ]
    self_loop = (args.self_loop == 'True')
    use_layernorm = (args.use_layernorm == 'True')
    global t0
    if args.dataset in {'cora', 'citeseer', 'pubmed'}:
        data = load_data(args)
    else:
        raise NotImplementedError(f'{args.dataset} is not a valid dataset')
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    features = features.to(device)
    labels = labels.to(device)
    train_mask = train_mask.to(device)
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    g = g.to(device)
    n_edges = g.number_of_edges()

    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    norm = norm.to(device)
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                args.dropout, use_layernorm)
    model = model.to(device)
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    record = []
    dur = []
    for epoch in range(args.n_epochs):
        if args.lr_scheduler:
            if epoch == int(0.5 * args.n_epochs):
                for pg in optimizer.param_groups:
                    pg['lr'] = pg['lr'] / 10
            elif epoch == int(0.75 * args.n_epochs):
                for pg in optimizer.param_groups:
                    pg['lr'] = pg['lr'] / 10
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        optimizer.zero_grad()
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc_val = evaluate(model, features, labels, val_mask)
        acc_test = evaluate(model, features, labels, test_mask)
        record.append([acc_val, acc_test])

    all_test_acc = [v[1] for v in record]
    all_val_acc = [v[0] for v in record]
    acc = evaluate(model, features, labels, test_mask)
    print(f"Final Test Accuracy: {acc:.4f}")
    print(f"Best Val Accuracy: {max(all_val_acc):.4f}")
    print(f"Best Test Accuracy: {max(all_test_acc):.4f}")
Ejemplo n.º 3
0
    all_train_features = []
    for _, _, _, features, _, _ in gcn_train_data:
        all_train_features.append(features)

    all_train_features = torch.cat(all_train_features)
    train_mean = torch.mean(all_train_features, dim=0)
    train_std = torch.std(all_train_features, dim=0)
    # train_mean[-10:] = 0
    # train_std[-10:] = 1

    # training GCN
    # model initiailization
    device = 'cuda'
    lr = 0.0005
    model = GCN(raw_feature_size=full_feature_size)
    model = model.to(device=device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # loss functions
    mse_loss = nn.MSELoss()
    mae_loss = nn.L1Loss()
    loss_fn = mse_loss

    f_train_result = open('results/gcn/train.csv', 'a')
    f_test_result = open('results/gcn/test.csv', 'a')

    # training
    for epoch in range(2001):
        train_r2_list, train_mae_list, train_mse_list = [], [], []
        test_r2_list, test_mae_list, test_mse_list = [], [], []
Ejemplo n.º 4
0
def main(training_file,
         dev_file,
         test_file,
         epochs=None,
         patience=None,
         num_heads=None,
         num_out_heads=None,
         num_layers=None,
         num_hidden=None,
         residual=None,
         in_drop=None,
         attn_drop=None,
         lr=None,
         weight_decay=None,
         alpha=None,
         batch_size=None,
         graph_type=None,
         net=None,
         freeze=None,
         cuda=None,
         fw=None):

    # number of training epochs
    if epochs is None:
        epochs = 400
    print('EPOCHS', epochs)
    # used for early stop
    if patience is None:
        patience = 15
    print('PATIENCE', patience)

    # number of hidden attention heads
    if num_heads is None:
        num_heads_ch = [4, 5, 6, 7]
    else:
        num_heads_ch = flattenList(num_heads)
    print('NUM HEADS', num_heads_ch)

    # number of output attention heads
    if num_out_heads is None:
        num_out_heads_ch = [4, 5, 6, 7]
    else:
        num_out_heads_ch = flattenList(num_out_heads)
    print('NUM OUT HEADS', num_out_heads_ch)

    # number of hidden layers
    if num_layers is None:
        num_layers_ch = [2, 3, 4, 5, 6]
    else:
        num_layers_ch = flattenList(num_layers)
    print('NUM LAYERS', num_layers_ch)
    # number of hidden units
    if num_hidden is None:
        num_hidden_ch = [32, 64, 96, 128, 256, 350, 512]
    else:
        num_hidden_ch = flattenList(num_hidden)
    print('NUM HIDDEN', num_hidden_ch)
    # use residual connection
    if residual is None:
        residual_ch = [True, False]
    else:
        residual_ch = flattenList(residual)
    print('RESIDUAL', residual_ch)
    # input feature dropout
    if in_drop is None:
        in_drop_ch = [0., 0.001, 0.0001, 0.00001]
    else:
        in_drop_ch = flattenList(in_drop)
    print('IN DROP', in_drop_ch)
    # attention dropout
    if attn_drop is None:
        attn_drop_ch = [0., 0.001, 0.0001, 0.00001]
    else:
        attn_drop_ch = flattenList(attn_drop)
    print('ATTENTION DROP', attn_drop_ch)
    # learning rate
    if lr is None:
        lr_ch = [0.0000005, 0.0000015, 0.00001, 0.00005, 0.0001]
    else:
        lr_ch = flattenList(lr)
    print('LEARNING RATE', lr_ch)
    # weight decay
    if weight_decay is None:
        weight_decay_ch = [0.0001, 0.001, 0.005]
    else:
        weight_decay_ch = flattenList(weight_decay)
    print('WEIGHT DECAY', weight_decay_ch)
    # the negative slop of leaky relu
    if alpha is None:
        alpha_ch = [0.1, 0.15, 0.2]
    else:
        alpha_ch = flattenList(alpha)
    print('ALPHA', alpha_ch)
    # batch size used for training, validation and test
    if batch_size is None:
        batch_size_ch = [175, 256, 350, 450, 512, 800, 1600]
    else:
        batch_size_ch = flattenList(batch_size)
    print('BATCH SIZE', batch_size_ch)
    # net type
    if net is None:
        net_ch = [GCN, GAT, RGCN, PGCN, PRGCN, GGN, PGAT]
    else:
        net_ch_raw = flattenList(net)
        net_ch = []
        for ch in net_ch_raw:
            if ch.lower() == 'gcn':
                if fw == 'dgl':
                    net_ch.append(GCN)
                else:
                    net_ch.append(PGCN)
            elif ch.lower() == 'gat':
                if fw == 'dgl':
                    net_ch.append(GAT)
                else:
                    net_ch.append(PGAT)
            elif ch.lower() == 'rgcn':
                if fw == 'dgl':
                    net_ch.append(RGCN)
                else:
                    net_ch.append(PRGCN)
            elif ch.lower() == 'ggn':
                net_ch.append(GGN)
            elif ch.lower() == 'rgat':
                net_ch.append(PRGAT)
            else:
                print('Network type {} is not recognised.'.format(ch))
                sys.exit(1)
    print('NET TYPE', net_ch)
    # graph type
    if net_ch in [GCN, GAT, PGCN, GGN, PGAT]:
        if graph_type is None:
            graph_type_ch = ['raw', '1', '2', '3', '4', 'relational']
        else:
            graph_type_ch = flattenList(graph_type)
    else:
        if graph_type is None:
            graph_type_ch = ['relational']
        else:
            graph_type_ch = flattenList(graph_type)

    print('GRAPH TYPE', graph_type_ch)
    # Freeze input neurons?
    if freeze is None:
        freeze_ch = [True, False]
    else:
        freeze_ch = flattenList(freeze)
    print('FREEZE', freeze_ch)
    # CUDA?
    if cuda is None:
        device = torch.device("cpu")
    elif cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    print('DEVICE', device)
    if fw is None:
        fw = ['dgl', 'pg']

    # define loss function
    # loss_fcn = torch.nn.BCEWithLogitsLoss()
    loss_fcn = torch.nn.MSELoss()

    for trial in range(10):
        trial_s = str(trial).zfill(6)
        num_heads = random.choice(num_heads_ch)
        num_out_heads = random.choice(num_out_heads_ch)
        num_layers = random.choice(num_layers_ch)
        num_hidden = random.choice(num_hidden_ch)
        residual = random.choice(residual_ch)
        in_drop = random.choice(in_drop_ch)
        attn_drop = random.choice(attn_drop_ch)
        lr = random.choice(lr_ch)
        weight_decay = random.choice(weight_decay_ch)
        alpha = random.choice(alpha_ch)
        batch_size = random.choice(batch_size_ch)
        graph_type = random.choice(graph_type_ch)
        net_class = random.choice(net_ch)
        freeze = random.choice(freeze_ch)
        fw = random.choice(fw)
        if freeze == False:
            freeze = 0
        else:
            if graph_type == 'raw' or graph_type == '1' or graph_type == '2':
                freeze = 4
            elif graph_type == '3' or graph_type == '4':
                freeze = 6
            elif graph_type == 'relational':
                freeze = 5
            else:
                sys.exit(1)

        print('=========================')
        print('TRIAL', trial_s)
        print('HEADS', num_heads)
        print('OUT_HEADS', num_out_heads)
        print('LAYERS', num_layers)
        print('HIDDEN', num_hidden)
        print('RESIDUAL', residual)
        print('inDROP', in_drop)
        print('atDROP', attn_drop)
        print('LR', lr)
        print('DECAY', weight_decay)
        print('ALPHA', alpha)
        print('BATCH', batch_size)
        print('GRAPH_ALT', graph_type)
        print('ARCHITECTURE', net_class)
        print('FREEZE', freeze)
        print('FRAMEWORK', fw)
        print('=========================')

        # create the dataset
        print('Loading training set...')
        train_dataset = SocNavDataset(training_file,
                                      mode='train',
                                      alt=graph_type)
        print('Loading dev set...')
        valid_dataset = SocNavDataset(dev_file, mode='valid', alt=graph_type)
        print('Loading test set...')
        test_dataset = SocNavDataset(test_file, mode='test', alt=graph_type)
        print('Done loading files')
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=batch_size,
                                      collate_fn=collate)
        valid_dataloader = DataLoader(valid_dataset,
                                      batch_size=batch_size,
                                      collate_fn=collate)
        test_dataloader = DataLoader(test_dataset,
                                     batch_size=batch_size,
                                     collate_fn=collate)

        num_rels = train_dataset.data[0].num_rels
        cur_step = 0
        best_loss = -1
        n_classes = train_dataset.labels.shape[1]
        print('Number of classes:  {}'.format(n_classes))
        num_feats = train_dataset.features.shape[1]
        print('Number of features: {}'.format(num_feats))
        g = train_dataset.graph
        heads = ([num_heads] * num_layers) + [num_out_heads]
        # define the model

        if fw == 'dgl':
            if net_class in [GCN]:
                model = GCN(g, num_feats, num_hidden, n_classes, num_layers,
                            F.elu, in_drop)
            elif net_class in [GAT]:
                model = net_class(g,
                                  num_layers,
                                  num_feats,
                                  num_hidden,
                                  n_classes,
                                  heads,
                                  F.elu,
                                  in_drop,
                                  attn_drop,
                                  alpha,
                                  residual,
                                  freeze=freeze)
            else:
                # def __init__(self, g, in_dim, h_dim, out_dim, num_rels, num_hidden_layers=1):
                model = RGCN(g,
                             in_dim=num_feats,
                             h_dim=num_hidden,
                             out_dim=n_classes,
                             num_rels=num_rels,
                             feat_drop=in_drop,
                             num_hidden_layers=num_layers,
                             freeze=freeze)
        else:

            if net_class in [PGCN]:
                model = PGCN(
                    num_feats,
                    n_classes,
                    num_hidden,
                    num_layers,
                    in_drop,
                    F.relu,
                    improved=True,  #Compute A-hat as A + 2I
                    bias=True)

            elif net_class in [PRGCN]:
                model = PRGCN(
                    num_feats,
                    n_classes,
                    num_rels,
                    num_rels,  #num_rels?   # TODO: Add variable
                    num_hidden,
                    num_layers,
                    in_drop,
                    F.relu,
                    bias=True)
            elif net_class in [PGAT]:
                model = PGAT(num_feats,
                             n_classes,
                             num_heads,
                             in_drop,
                             num_hidden,
                             num_layers,
                             F.relu,
                             concat=True,
                             neg_slope=alpha,
                             bias=True)
            elif net_class in [PRGAT]:
                model = PRGAT(
                    num_feats,
                    n_classes,
                    num_heads,
                    num_rels,
                    num_rels,  #num_rels?   # TODO: Add variable
                    num_hidden,
                    num_layers,
                    num_layers,
                    in_drop,
                    F.relu,
                    alpha,
                    bias=True)
            else:
                model = GGN(num_feats, num_layers, aggr='mean', bias=True)
        #Describe the model
        #describe_model(model)

        # define the optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=weight_decay)
        # for name, param in model.named_parameters():
        # if param.requires_grad:
        # print(name, param.data.shape)
        model = model.to(device)

        for epoch in range(epochs):
            model.train()
            loss_list = []
            for batch, data in enumerate(train_dataloader):
                subgraph, feats, labels = data
                subgraph.set_n_initializer(dgl.init.zero_initializer)
                subgraph.set_e_initializer(dgl.init.zero_initializer)
                feats = feats.to(device)
                labels = labels.to(device)
                if fw == 'dgl':
                    model.g = subgraph
                    for layer in model.layers:
                        layer.g = subgraph
                    logits = model(feats.float())
                else:
                    if net_class in [PGCN, PGAT, GGN]:
                        data = Data(x=feats.float(),
                                    edge_index=torch.stack(
                                        subgraph.edges()).to(device))
                    else:
                        data = Data(
                            x=feats.float(),
                            edge_index=torch.stack(
                                subgraph.edges()).to(device),
                            edge_type=subgraph.edata['rel_type'].squeeze().to(
                                device))
                    logits = model(data)
                loss = loss_fcn(logits[getMaskForBatch(subgraph)],
                                labels.float())
                optimizer.zero_grad()
                a = list(model.parameters())[0].clone()
                loss.backward()
                optimizer.step()
                b = list(model.parameters())[0].clone()
                not_learning = torch.equal(a.data, b.data)
                if not_learning:
                    import sys
                    print('Not learning')
                    # sys.exit(1)
                else:
                    pass
                    # print('Diff: ', (a.data-b.data).sum())
                # print(loss.item())
                loss_list.append(loss.item())
            loss_data = np.array(loss_list).mean()
            print('Loss: {}'.format(loss_data))
            if epoch % 5 == 0:
                if epoch % 5 == 0:
                    print(
                        "Epoch {:05d} | Loss: {:.4f} | Patience: {} | ".format(
                            epoch, loss_data, cur_step),
                        end='')
                score_list = []
                val_loss_list = []
                for batch, valid_data in enumerate(valid_dataloader):
                    subgraph, feats, labels = valid_data
                    subgraph.set_n_initializer(dgl.init.zero_initializer)
                    subgraph.set_e_initializer(dgl.init.zero_initializer)
                    feats = feats.to(device)
                    labels = labels.to(device)
                    score, val_loss = evaluate(feats.float(), model, subgraph,
                                               labels.float(), loss_fcn, fw,
                                               net_class)
                    score_list.append(score)
                    val_loss_list.append(val_loss)
                mean_score = np.array(score_list).mean()
                mean_val_loss = np.array(val_loss_list).mean()
                if epoch % 5 == 0:
                    print("Score: {:.4f} MEAN: {:.4f} BEST: {:.4f}".format(
                        mean_score, mean_val_loss, best_loss))
                # early stop
                if best_loss > mean_val_loss or best_loss < 0:
                    best_loss = mean_val_loss
                    # Save the model
                    # print('Writing to', trial_s)
                    torch.save(
                        model.state_dict(), fw + str(net) + '.tch'
                    )  #   3       4           5          6          7         8      9      10       11         12      13       14        15
                    params = [
                        val_loss, graph_type,
                        str(type(net_class)), g, num_layers, num_feats,
                        num_hidden, n_classes, heads, F.elu, in_drop,
                        attn_drop, alpha, residual, num_rels, freeze
                    ]
                    pickle.dump(params, open(fw + str(net) + '.prms', 'wb'))
                    cur_step = 0
                else:
                    cur_step += 1
                    if cur_step >= patience:
                        break
        torch.save(model, 'gattrial.pth')
        test_score_list = []
        for batch, test_data in enumerate(test_dataloader):
            subgraph, feats, labels = test_data
            subgraph.set_n_initializer(dgl.init.zero_initializer)
            subgraph.set_e_initializer(dgl.init.zero_initializer)
            feats = feats.to(device)
            labels = labels.to(device)
            test_score_list.append(
                evaluate(feats, model, subgraph, labels.float(), loss_fcn, fw,
                         net_class)[0])
        print("F1-Score: {:.4f}".format(np.array(test_score_list).mean()))
        model.eval()
        return best_loss
Ejemplo n.º 5
0
    adj_mtx = data_generator.get_adj_mat()

    # create model name and save
    modelname =  "GCN" + \
        "_bs_" + str(batch_size) + \
        "_nemb_" + str(emb_dim) + \
        "_layers_" + str(layers) + \
        "_nodedr_" + str(node_dropout) + \
        "_messdr_" + str(mess_dropout) + \
        "_reg_" + str(reg) + \
        "_lr_"  + str(lr)

    # create GCN model
    model = GCN(data_generator.n_users, data_generator.n_items, emb_dim,
                layers, reg, node_dropout, mess_dropout, adj_mtx)
    model.to(device='cuda:0')
    model = torch.nn.DataParallel(model, device_ids=[0, 1])
    # current best metric
    cur_best_metric = 0

    # Adam optimizer
    # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
    # Set values for early stopping
    cur_best_loss, stopping_step, should_stop = 1e3, 0, False
    today = datetime.now()

    print("Start at " + str(today))
    print("Using " + str(device) + " for computations")
    print("Params on CUDA: " + str(next(model.parameters()).is_cuda))