Exemple #1
0
def evaluate(args):
    start = time.time()

    device_name = 'cuda:{}'.format(args.gpu) if args.gpu >= 0 else 'cpu'
    device = T.device(device_name)
    print('Using device', device_name)

    _, features, labels, train_mask, val_mask, test_mask = data.get_node_classification_data(
        'cora', args.norm_constant, args.num_hops, large_split=False)
    features = T.from_numpy(features).float()
    print('Built {}-hop aggregated features with k={}'.format(
        args.num_hops, args.norm_constant))

    T.set_grad_enabled(False)

    saved_model = T.load(args.model)
    cg = model.CountingGrid(features.shape[1], saved_model['wcg'].shape[-1],
                            args.cg_window, 0).to(device)
    cg.wcg[:] = saved_model['wcg']
    print('Loaded saved model', args.model)

    # compute posterior
    logit_posterior = cg(features.to(device)).view(-1, cg.size**2)
    posterior_pred = (logit_posterior / args.alpha).softmax(1)
    posterior_emb = (logit_posterior / args.beta).softmax(1)

    # compute p(c|s)
    pcs = T.matmul(posterior_emb[train_mask].T,
                   T.from_numpy(labels[train_mask]).float().to(device))
    pcs /= pcs.sum(1).unsqueeze(1)

    # compute predictions
    probs = T.matmul(posterior_pred, pcs)
    preds = probs.argmax(1).cpu().numpy()

    # compute accuracy
    val_acc = ((preds[val_mask]
                == labels[val_mask].argmax(1)).sum()) / val_mask.sum()
    test_acc = ((preds[test_mask]
                 == labels[test_mask].argmax(1)).sum()) / test_mask.sum()

    print('Val accuracy', val_acc)
    print('Test accuracy', test_acc)

    print('Total execution time', time.time() - start)
Exemple #2
0
def visualize(args):
    start = time.time()

    device_name = 'cuda:{}'.format(args.gpu) if args.gpu >= 0 else 'cpu'
    device = T.device(device_name)
    print('Using device', device_name)

    _, features, labels, _, _, _ = data.get_node_classification_data(
        'cora', args.norm_constant, args.num_hops, large_split=False)
    features = T.from_numpy(features).float()
    print('Built {}-hop aggregated features with k={}'.format(
        args.num_hops, args.norm_constant))

    T.set_grad_enabled(False)

    saved_model = T.load(args.model)
    cg = model.CountingGrid(features.shape[1], saved_model['wcg'].shape[-1],
                            args.cg_window, 0).to(device)
    cg.wcg[:] = saved_model['wcg']
    print('Loaded saved model', args.model)

    # compute posterior
    logit_posterior = cg(features.to(device)).view(-1, cg.size**2)
    posterior = logit_posterior.softmax(1)

    # compute p(c|s)
    pcs = T.matmul(posterior.T, T.from_numpy(labels).float().to(device))
    pcs /= pcs.sum(1).unsqueeze(1)

    cats = [
        'Case_Based', 'Genetic_Algorithms', 'Neural_Networks',
        'Probabilistic_Methods', 'Reinforcement_Learning', 'Rule_Learning',
        'Theory'
    ]
    pt.figure(figsize=(10, 6))
    for i in range(7):
        pt.subplot(2, 4, i + 1)
        pt.title(cats[i])
        pt.imshow(pcs[:, i].cpu().numpy().reshape(cg.size, cg.size))
    pt.savefig(args.out)

    print('Saved image', args.out)

    print('Total execution time', time.time() - start)
Exemple #3
0
def train(args):
    
    start = time.time()
    
    device_name = 'cuda:{}'.format(args.gpu) if args.gpu>=0 else 'cpu'
    device = T.device(device_name)
    print('Using device', device_name)
    
    _, features, _, _, _, _ = data.get_node_classification_data('cora', args.norm_constant, args.num_hops, large_split=False)    
    features = T.from_numpy(features).float()
    print('Built {}-hop aggregated features with k={}'.format(args.num_hops, args.norm_constant))
    
    cg = model.CountingGrid(features.shape[1], args.cg_size, args.cg_window, args.clamp_constant).to(device)
    optimizer = optimizer = T.optim.Adam(lr=args.learning_rate, params=cg.parameters())
    
    print('Training {} / {} CG for {} batches of size {}'.format(args.cg_size, args.cg_window, args.num_batches, args.batch_size))
            
    cum_loss = 0.
    for i in range(args.num_batches): 
        indices = np.random.randint(features.shape[0],size=(args.batch_size,))
        
        optimizer.zero_grad()
        lp = cg(features[indices].to(device))
        loss = -lp.logsumexp((1,2)).mean() # the log likelihood (up to +const.)
        loss.backward()
        cum_loss += loss.item()
        optimizer.step()
        with T.no_grad(): cg.clamp()
    
        if i%args.print_interval==0:
            # average log likelihood of node
            print('Batch {} of {}: logP = '.format(i, args.num_batches), -cum_loss / (1 if i==0 else args.print_interval) - np.log(cg.size**2))
            cum_loss = 0.
        
    T.save(cg.state_dict(), args.out)
    print('Saved model', args.out)
    
    print('Total execution time', time.time()-start)
def link_prediction(args):

    start = time.time()

    device_name = 'cuda:{}'.format(args.gpu) if args.gpu >= 0 else 'cpu'
    device = T.device(device_name)
    print('Using device', device_name)

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, features = data.get_link_prediction_data(
        'cora', args.norm_constant, args.num_hops)
    features = T.from_numpy(features).float()
    print(
        'Built {}-hop aggregated features from damaged graph with k={}'.format(
            args.num_hops, args.norm_constant))

    cg = model.CountingGrid(features.shape[1], args.cg_size, args.cg_window,
                            args.clamp_constant).to(device)
    optimizer = optimizer = T.optim.Adam(lr=args.learning_rate,
                                         params=cg.parameters())

    print('Training {} / {} CG for {} batches of size {}'.format(
        args.cg_size, args.cg_window, args.num_batches, args.batch_size))

    cum_loss = 0.
    for i in range(args.num_batches):
        indices = np.random.randint(features.shape[0],
                                    size=(args.batch_size, ))

        optimizer.zero_grad()
        lp = cg(features[indices].to(device))
        loss = -lp.logsumexp(
            (1, 2)).mean()  # the log likelihood (up to +const.)
        loss.backward()
        cum_loss += loss.item()
        optimizer.step()
        with T.no_grad():
            cg.clamp()

        if i % args.print_interval == 0:
            # average log likelihood of node
            print(
                'Batch {} of {}: logP = '.format(i,
                                                 args.num_batches), -cum_loss /
                (1 if i == 0 else args.print_interval) - np.log(cg.size**2))
            cum_loss = 0.

    T.set_grad_enabled(False)

    print('Evaluating trained model')

    best_val_mean = 0.
    best_test_auc, best_test_ap = 0., 0.
    for c in range(1, 11):

        logit_posterior = cg(features.to(device)).view(-1, cg.size**2)
        posterior = (logit_posterior * c).softmax(1)

        latent_link_prob = T.einsum('is,ij,jt->st', posterior,
                                    T.from_numpy(adj_train).float().to(device),
                                    posterior)
        latent_link_denom = T.einsum('is,jt->st', posterior, posterior)
        latent_link_prob /= latent_link_denom

        link_prob = T.einsum('is,st,jt->ij', posterior, latent_link_prob,
                             posterior).cpu().numpy()

        val_auc, val_ap = evaluate(link_prob, val_edges, val_edges_false)
        test_auc, test_ap = evaluate(link_prob, test_edges, test_edges_false)

        if (val_auc + val_ap) / 2 > best_val_mean:
            best_val_mean = (val_auc + val_ap) / 2
            best_test_auc, best_test_ap = test_auc, test_ap

        print('Hardening constant c={}: validation AUC={}, AP={}'.format(
            c, val_auc, val_ap))

    print('Test AUC', best_test_auc)
    print('Test AP', best_test_ap)

    print('Total execution time', time.time() - start)
def finetune_evaluate(args):
    start = time.time()

    device_name = 'cuda:{}'.format(args.gpu) if args.gpu >= 0 else 'cpu'
    device = T.device(device_name)
    print('Using device', device_name)

    _, features, labels, train_mask, val_mask, test_mask = data.get_node_classification_data(
        'cora', args.norm_constant, args.num_hops, large_split=True)
    features = T.from_numpy(features).float()
    print('Built {}-hop aggregated features with k={}'.format(
        args.num_hops, args.norm_constant))

    T.set_grad_enabled(False)

    saved_model = T.load(args.model)
    cg = model.CountingGrid(features.shape[1], saved_model['wcg'].shape[-1],
                            args.cg_window, args.clamp_constant).to(device)
    cg.wcg[:] = saved_model['wcg']
    print('Loaded saved model', args.model)

    # compute posterior
    logit_posterior = cg(features.to(device)).view(-1, cg.size**2)
    posterior_pred = (logit_posterior / args.alpha).softmax(1)
    posterior_emb = (logit_posterior / args.beta).softmax(1)

    # compute p(c|s)
    pcs = T.matmul(posterior_emb[train_mask].T,
                   T.from_numpy(labels[train_mask]).float().to(device))
    pcs /= pcs.sum(1).unsqueeze(1)

    # compute predictions
    probs = T.matmul(posterior_pred, pcs)
    preds = probs.argmax(1).cpu().numpy()

    # compute accuracy
    val_acc = ((preds[val_mask]
                == labels[val_mask].argmax(1)).sum()) / val_mask.sum()
    test_acc = ((preds[test_mask]
                 == labels[test_mask].argmax(1)).sum()) / test_mask.sum()

    print('Initial val accuracy', val_acc)
    print('Initial test accuracy', test_acc)

    pcs.log_()  # use log-domain parametrization of p(c|s)

    # make CG and p(c|s) matrix trainable
    T.set_grad_enabled(True)
    cg.wcg.requires_grad = True
    pcs.requires_grad = True

    opt = T.optim.SGD([cg.wcg, pcs],
                      lr=args.learning_rate,
                      momentum=args.momentum)

    nll = T.nn.NLLLoss().to(device)

    best_val_acc, best_test_acc = 0., 0.

    print('Made p(c|s) and CG parameters trainable, finetuning for {} epochs'.
          format(args.finetune_steps))

    for i in range(args.finetune_steps):

        opt.zero_grad()

        logit_posterior = cg(features.to(device)).view(-1, cg.size**2)
        posterior_pred = (logit_posterior / args.alpha).softmax(1)

        # compute predictions
        probs = T.matmul(posterior_pred, pcs.softmax(1))
        preds = probs.argmax(1).cpu().numpy()

        # get the current val/test accuracy
        with T.no_grad():
            valacc = ((preds[val_mask]
                       == labels[val_mask].argmax(1)).sum()) / val_mask.sum()
            testacc = (
                (preds[test_mask]
                 == labels[test_mask].argmax(1)).sum()) / test_mask.sum()

            if valacc > best_val_acc:
                best_val_acc, best_test_acc = valacc, testacc

            print('Epoch {} of {}: validation accuracy {}'.format(
                i, args.finetune_steps, valacc))

        loss = nll(T.log(probs[train_mask]),
                   T.from_numpy(labels[train_mask].argmax(1)).to(device))
        loss.backward()
        opt.step()
        with T.no_grad():
            cg.clamp()

    print('Best val accuracy', best_val_acc)
    print('Test accuracy at best epoch', best_test_acc)

    print('Total execution time', time.time() - start)