Exemple #1
0
def evaluate_downstream(model, dataloader, device):
    model.eval()
    total_loss = 0
    x_embeds = []
    ys = []
    with torch.no_grad():
        for data in dataloader:
            ys.append(data.y.cpu().detach().numpy())
            if data.x is None:
                data.x = torch.ones(data.batch.shape[0])
            data = data.to(device)
            x_embed = model.encode_graph(data)
            x_embeds.append(x_embed.cpu().detach().numpy())
    evaluate_embedding(np.vstack(x_embeds), np.concatenate(ys))
Exemple #2
0
            x_aug = model(data_aug.x, data_aug.edge_index, data_aug.batch,
                          data_aug.num_graphs)
            x_stro_aug = model(data_stro_aug.x, data_stro_aug.edge_index,
                               data_stro_aug.batch, data_stro_aug.num_graphs)

            # print(x)
            # print(x_aug)
            loss = model.loss_cal(x_stro_aug, x_aug)
            # print(loss)
            loss_all += loss.item()  #* data.num_graphs
            loss.backward()
            optimizer.step()
            # print('batch')
        print('Epoch {}, Loss {}'.format(epoch, loss_all / len(dataloader)))

        if epoch % log_interval == 0:
            model.eval()
            emb, y = model.encoder.get_embeddings(dataloader_eval)
            acc_val, acc = evaluate_embedding(emb, y)
            accuracies['val'].append(acc_val)
            accuracies['test'].append(acc)
            # print(accuracies['val'][-1], accuracies['test'][-1])

    tpe = ('local' if args.local else '') + ('prior' if args.prior else '')
    with open('logs/log_' + args.DS + '_' + args.aug, 'a+') as f:
        s = json.dumps(accuracies)
        f.write('{},{},{},{},{},{},{}\n'.format(args.DS, tpe,
                                                args.num_gc_layers, epochs,
                                                log_interval, lr, s))
        f.write('\n')
Exemple #3
0
    dataloader = DataLoader(dataset, batch_size=batch_size)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GcnInfomax(args.hidden_dim, args.num_gc_layers).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    print('================')
    print('lr: {}'.format(lr))
    print('num_features: {}'.format(dataset_num_features))
    print('hidden_dim: {}'.format(args.hidden_dim))
    print('num_gc_layers: {}'.format(args.num_gc_layers))
    print('================')

    model.eval()
    emb, y = model.encoder.get_embeddings(dataloader)
    res = evaluate_embedding(emb, y)
    accuracies['logreg'].append(res[0])
    accuracies['svc'].append(res[1])
    accuracies['linearsvc'].append(res[2])
    accuracies['randomforest'].append(res[3])

    for epoch in range(1, epochs + 1):
        loss_all = 0
        model.train()
        for data in dataloader:
            data = data.to(device)
            optimizer.zero_grad()
            loss = model(data.x, data.edge_index, data.batch, data.num_graphs)
            loss_all += loss.item() * data.num_graphs
            loss.backward()
            optimizer.step()
                                 shuffle=True)

    in_dim = dataset[0][0].ndata['attr'].shape[1]

    # Step 2: Create model =================================================================== #
    model = InfoGraph(in_dim, args.hid_dim, args.n_layers)
    model = model.to(args.device)

    # Step 3: Create training components ===================================================== #
    optimizer = th.optim.Adam(model.parameters(), lr=args.lr)

    print('===== Before training ======')

    wholegraph = wholegraph.to(args.device)
    emb = model.get_embedding(wholegraph).cpu()
    res = evaluate_embedding(emb, labels)

    print('logreg {:4f}, svc {:4f}'.format(res[0], res[1]))

    best_logreg = 0
    best_svc = 0
    best_epoch = 0
    best_loss = 0

    # Step 4: training epoches =============================================================== #
    for epoch in range(1, args.epochs):
        loss_all = 0
        model.train()

        for graph, label in dataloader:
            graph = graph.to(args.device)
Exemple #5
0
def train(args, DS, gpu, num_gc_layers=4, epoch=40, batch=64):

    accuracies = {'val': [], 'test': []}
    epochs = epoch
    log_interval = 10
    batch_size = batch
    # batch_size = 512
    lr = args.lr
    path = osp.join(osp.dirname(osp.realpath(__file__)), 'data', DS)
    # kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
    # print(path)
    dataset = TUDataset(path, name=DS, aug=args.aug,
                        stro_aug=args.stro_aug).shuffle()
    dataset_eval = TUDataset(path, name=DS, aug='none',
                             stro_aug='none').shuffle()
    print(len(dataset))
    print(dataset.get_num_feature())
    try:
        dataset_num_features = dataset.get_num_feature()
    except:
        dataset_num_features = 1

    dataloader = DataLoader(dataset, batch_size=batch_size)
    dataloader_eval = DataLoader(dataset_eval, batch_size=batch_size)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    online_encoder = Encoder(dataset_num_features, args.hidden_dim,
                             num_gc_layers)
    model = BYOL(online_encoder,
                 args.hidden_dim,
                 num_gc_layers,
                 use_momentum=False).to(device)
    # print(model)
    optimizer = torch.optim.Adam(model.online_encoder.parameters(), lr=lr)

    print('================')
    print('lr: {}'.format(lr))
    print('num_features: {}'.format(dataset_num_features))
    print('hidden_dim: {}'.format(args.hidden_dim))
    print('num_gc_layers: {}'.format(num_gc_layers))
    print('================')

    model.eval()
    emb, y = model.online_encoder.get_embeddings(dataloader_eval)
    # print(emb.shape, y.shape)
    """
    acc_val, acc = evaluate_embedding(emb, y)
    accuracies['val'].append(acc_val)
    accuracies['test'].append(acc)
    """

    for epoch in range(1, epochs + 1):
        loss_all = 0
        model.train()
        for data in dataloader:

            # print('start')
            data, data_weak_aug, data_stro_aug = data
            optimizer.zero_grad()

            node_num, _ = data.x.size()
            data = data.to(device)
            # x = model(data.x, data.edge_index, data.batch, data.num_graphs)

            if args.aug == 'dnodes' or args.aug == 'subgraph' or args.aug == 'random2' or args.aug == 'random3' or args.aug == 'random4':
                edge_idx = data_weak_aug.edge_index.numpy()
                _, edge_num = edge_idx.shape
                idx_not_missing = [
                    n for n in range(node_num)
                    if (n in edge_idx[0] or n in edge_idx[1])
                ]

                node_num_aug = len(idx_not_missing)
                data_weak_aug.x = data_weak_aug.x[idx_not_missing]

                data_weak_aug.batch = data.batch[idx_not_missing]
                idx_dict = {idx_not_missing[n]: n for n in range(node_num_aug)}
                edge_idx = [[
                    idx_dict[edge_idx[0, n]], idx_dict[edge_idx[1, n]]
                ] for n in range(edge_num)
                            if not edge_idx[0, n] == edge_idx[1, n]]
                data_weak_aug.edge_index = torch.tensor(edge_idx).transpose_(
                    0, 1)

            if args.stro_aug == 'stro_dnodes' or args.stro_aug == \
                    'stro_subgraph' or args.stro_aug \
                    == 'random2' or args.stro_aug == 'random3' or args.stro_aug == 'random4':
                edge_idx = data_stro_aug.edge_index.numpy()
                _, edge_num = edge_idx.shape
                idx_not_missing = [
                    n for n in range(node_num)
                    if (n in edge_idx[0] or n in edge_idx[1])
                ]

                node_num_aug = len(idx_not_missing)
                data_stro_aug.x = data_stro_aug.x[idx_not_missing]

                data_stro_aug.batch = data.batch[idx_not_missing]
                idx_dict = {idx_not_missing[n]: n for n in range(node_num_aug)}
                edge_idx = [[
                    idx_dict[edge_idx[0, n]], idx_dict[edge_idx[1, n]]
                ] for n in range(edge_num)
                            if not edge_idx[0, n] == edge_idx[1, n]]
                data_stro_aug.edge_index = torch.tensor(edge_idx).transpose_(
                    0, 1)

            data_weak_aug = data_weak_aug.to(device)
            data_stro_aug = data_stro_aug.to(device)

            # weak_proj, x_proj = model(data.x, data.edge_index, data.batch,
            #     data.num_graphs, data_weak_aug.x, data_weak_aug.edge_index,
            #     data_weak_aug.batch, data_weak_aug.num_graphs)
            # target = model.loss_cal(x_proj, weak_proj)
            # loss_C = - torch.log(target).mean()
            #
            # stro_proj, x_proj = model(data.x, data.edge_index, data.batch,
            #     data.num_graphs, data_stro_aug.x, data_stro_aug.edge_index,
            #     data_stro_aug.batch, data_stro_aug.num_graphs)
            # prediction = model.loss_cal(x_proj, stro_proj)
            # loss_D = model.clsa_loss(prediction, target)

            # loss = loss_D.item() * data.num_graphs + loss_C
            # print('Loss {}, Loss_D {}, Loss_C'.format(loss, loss_D, loss_C))

            loss = model(data_weak_aug.x, data_weak_aug.edge_index,
                         data_weak_aug.batch, data_weak_aug.num_graphs,
                         data_stro_aug.x, data_stro_aug.edge_index,
                         data_stro_aug.batch, data_stro_aug.num_graphs)
            loss_all += loss

            loss.backward()
            optimizer.step()
            # model.update_moving_average()

        print('Epoch {}, Loss {}'.format(epoch, loss_all / len(dataloader)))

        if epoch % log_interval == 0:
            model.eval()
            emb, y = model.online_encoder.get_embeddings(dataloader_eval)
            acc_val, acc = evaluate_embedding(emb, y)
            accuracies['val'].append(acc_val)
            accuracies['test'].append(acc)
            # print(accuracies['val'][-1], accuracies['test'][-1])

    tpe = ('local' if args.local else '') + ('prior' if args.prior else '')
    with open(
            'logs/log_BYOL_' + args.DS + '_' + args.aug + '_' + args.stro_aug,
            'a+') as f:
        s = json.dumps(accuracies)
        f.write(
            '{},bs:{},epoch:{},layers:{},{},gpu:{},{},{},{},{},{}\n'.format(
                args.DS, batch, epoch, layers, tpe, gpu, num_gc_layers, epochs,
                log_interval, lr, s))
        f.write('\n')
Exemple #6
0
    in_dim = wholegraph.ndata['attr'].shape[1]

    # Step 2: Create model =================================================================== #
    model = InfoGraph(in_dim, args.hid_dim, args.n_layers)
    model = model.to(args.device)

    # Step 3: Create training components ===================================================== #
    optimizer = th.optim.Adam(model.parameters(), lr=args.lr)
 
    print('===== Before training ======')
    
    wholegraph = wholegraph.to(args.device)
    wholefeat = wholegraph.ndata['attr']
    
    emb = model.get_embedding(wholegraph, wholefeat).cpu()
    res = evaluate_embedding(emb, labels, args.device)

    ''' Evaluate the initialized embeddings '''
    ''' using logistic regression and SVM(non-linear) '''
    print('logreg {:4f}, svc {:4f}'.format(res[0], res[1]))
    
    best_logreg = 0
    best_logreg_epoch = 0
    best_svc = 0
    best_svc_epoch = 0

    # Step 4: training epochs =============================================================== #
    for epoch in range(args.epochs):
        loss_all = 0
        model.train()