def evaluate(args): start = time.time() device_name = 'cuda:{}'.format(args.gpu) if args.gpu >= 0 else 'cpu' device = T.device(device_name) print('Using device', device_name) _, features, labels, train_mask, val_mask, test_mask = data.get_node_classification_data( 'cora', args.norm_constant, args.num_hops, large_split=False) features = T.from_numpy(features).float() print('Built {}-hop aggregated features with k={}'.format( args.num_hops, args.norm_constant)) T.set_grad_enabled(False) saved_model = T.load(args.model) cg = model.CountingGrid(features.shape[1], saved_model['wcg'].shape[-1], args.cg_window, 0).to(device) cg.wcg[:] = saved_model['wcg'] print('Loaded saved model', args.model) # compute posterior logit_posterior = cg(features.to(device)).view(-1, cg.size**2) posterior_pred = (logit_posterior / args.alpha).softmax(1) posterior_emb = (logit_posterior / args.beta).softmax(1) # compute p(c|s) pcs = T.matmul(posterior_emb[train_mask].T, T.from_numpy(labels[train_mask]).float().to(device)) pcs /= pcs.sum(1).unsqueeze(1) # compute predictions probs = T.matmul(posterior_pred, pcs) preds = probs.argmax(1).cpu().numpy() # compute accuracy val_acc = ((preds[val_mask] == labels[val_mask].argmax(1)).sum()) / val_mask.sum() test_acc = ((preds[test_mask] == labels[test_mask].argmax(1)).sum()) / test_mask.sum() print('Val accuracy', val_acc) print('Test accuracy', test_acc) print('Total execution time', time.time() - start)
def visualize(args): start = time.time() device_name = 'cuda:{}'.format(args.gpu) if args.gpu >= 0 else 'cpu' device = T.device(device_name) print('Using device', device_name) _, features, labels, _, _, _ = data.get_node_classification_data( 'cora', args.norm_constant, args.num_hops, large_split=False) features = T.from_numpy(features).float() print('Built {}-hop aggregated features with k={}'.format( args.num_hops, args.norm_constant)) T.set_grad_enabled(False) saved_model = T.load(args.model) cg = model.CountingGrid(features.shape[1], saved_model['wcg'].shape[-1], args.cg_window, 0).to(device) cg.wcg[:] = saved_model['wcg'] print('Loaded saved model', args.model) # compute posterior logit_posterior = cg(features.to(device)).view(-1, cg.size**2) posterior = logit_posterior.softmax(1) # compute p(c|s) pcs = T.matmul(posterior.T, T.from_numpy(labels).float().to(device)) pcs /= pcs.sum(1).unsqueeze(1) cats = [ 'Case_Based', 'Genetic_Algorithms', 'Neural_Networks', 'Probabilistic_Methods', 'Reinforcement_Learning', 'Rule_Learning', 'Theory' ] pt.figure(figsize=(10, 6)) for i in range(7): pt.subplot(2, 4, i + 1) pt.title(cats[i]) pt.imshow(pcs[:, i].cpu().numpy().reshape(cg.size, cg.size)) pt.savefig(args.out) print('Saved image', args.out) print('Total execution time', time.time() - start)
def train(args): start = time.time() device_name = 'cuda:{}'.format(args.gpu) if args.gpu>=0 else 'cpu' device = T.device(device_name) print('Using device', device_name) _, features, _, _, _, _ = data.get_node_classification_data('cora', args.norm_constant, args.num_hops, large_split=False) features = T.from_numpy(features).float() print('Built {}-hop aggregated features with k={}'.format(args.num_hops, args.norm_constant)) cg = model.CountingGrid(features.shape[1], args.cg_size, args.cg_window, args.clamp_constant).to(device) optimizer = optimizer = T.optim.Adam(lr=args.learning_rate, params=cg.parameters()) print('Training {} / {} CG for {} batches of size {}'.format(args.cg_size, args.cg_window, args.num_batches, args.batch_size)) cum_loss = 0. for i in range(args.num_batches): indices = np.random.randint(features.shape[0],size=(args.batch_size,)) optimizer.zero_grad() lp = cg(features[indices].to(device)) loss = -lp.logsumexp((1,2)).mean() # the log likelihood (up to +const.) loss.backward() cum_loss += loss.item() optimizer.step() with T.no_grad(): cg.clamp() if i%args.print_interval==0: # average log likelihood of node print('Batch {} of {}: logP = '.format(i, args.num_batches), -cum_loss / (1 if i==0 else args.print_interval) - np.log(cg.size**2)) cum_loss = 0. T.save(cg.state_dict(), args.out) print('Saved model', args.out) print('Total execution time', time.time()-start)
def link_prediction(args): start = time.time() device_name = 'cuda:{}'.format(args.gpu) if args.gpu >= 0 else 'cpu' device = T.device(device_name) print('Using device', device_name) adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, features = data.get_link_prediction_data( 'cora', args.norm_constant, args.num_hops) features = T.from_numpy(features).float() print( 'Built {}-hop aggregated features from damaged graph with k={}'.format( args.num_hops, args.norm_constant)) cg = model.CountingGrid(features.shape[1], args.cg_size, args.cg_window, args.clamp_constant).to(device) optimizer = optimizer = T.optim.Adam(lr=args.learning_rate, params=cg.parameters()) print('Training {} / {} CG for {} batches of size {}'.format( args.cg_size, args.cg_window, args.num_batches, args.batch_size)) cum_loss = 0. for i in range(args.num_batches): indices = np.random.randint(features.shape[0], size=(args.batch_size, )) optimizer.zero_grad() lp = cg(features[indices].to(device)) loss = -lp.logsumexp( (1, 2)).mean() # the log likelihood (up to +const.) loss.backward() cum_loss += loss.item() optimizer.step() with T.no_grad(): cg.clamp() if i % args.print_interval == 0: # average log likelihood of node print( 'Batch {} of {}: logP = '.format(i, args.num_batches), -cum_loss / (1 if i == 0 else args.print_interval) - np.log(cg.size**2)) cum_loss = 0. T.set_grad_enabled(False) print('Evaluating trained model') best_val_mean = 0. best_test_auc, best_test_ap = 0., 0. for c in range(1, 11): logit_posterior = cg(features.to(device)).view(-1, cg.size**2) posterior = (logit_posterior * c).softmax(1) latent_link_prob = T.einsum('is,ij,jt->st', posterior, T.from_numpy(adj_train).float().to(device), posterior) latent_link_denom = T.einsum('is,jt->st', posterior, posterior) latent_link_prob /= latent_link_denom link_prob = T.einsum('is,st,jt->ij', posterior, latent_link_prob, posterior).cpu().numpy() val_auc, val_ap = evaluate(link_prob, val_edges, val_edges_false) test_auc, test_ap = evaluate(link_prob, test_edges, test_edges_false) if (val_auc + val_ap) / 2 > best_val_mean: best_val_mean = (val_auc + val_ap) / 2 best_test_auc, best_test_ap = test_auc, test_ap print('Hardening constant c={}: validation AUC={}, AP={}'.format( c, val_auc, val_ap)) print('Test AUC', best_test_auc) print('Test AP', best_test_ap) print('Total execution time', time.time() - start)
def finetune_evaluate(args): start = time.time() device_name = 'cuda:{}'.format(args.gpu) if args.gpu >= 0 else 'cpu' device = T.device(device_name) print('Using device', device_name) _, features, labels, train_mask, val_mask, test_mask = data.get_node_classification_data( 'cora', args.norm_constant, args.num_hops, large_split=True) features = T.from_numpy(features).float() print('Built {}-hop aggregated features with k={}'.format( args.num_hops, args.norm_constant)) T.set_grad_enabled(False) saved_model = T.load(args.model) cg = model.CountingGrid(features.shape[1], saved_model['wcg'].shape[-1], args.cg_window, args.clamp_constant).to(device) cg.wcg[:] = saved_model['wcg'] print('Loaded saved model', args.model) # compute posterior logit_posterior = cg(features.to(device)).view(-1, cg.size**2) posterior_pred = (logit_posterior / args.alpha).softmax(1) posterior_emb = (logit_posterior / args.beta).softmax(1) # compute p(c|s) pcs = T.matmul(posterior_emb[train_mask].T, T.from_numpy(labels[train_mask]).float().to(device)) pcs /= pcs.sum(1).unsqueeze(1) # compute predictions probs = T.matmul(posterior_pred, pcs) preds = probs.argmax(1).cpu().numpy() # compute accuracy val_acc = ((preds[val_mask] == labels[val_mask].argmax(1)).sum()) / val_mask.sum() test_acc = ((preds[test_mask] == labels[test_mask].argmax(1)).sum()) / test_mask.sum() print('Initial val accuracy', val_acc) print('Initial test accuracy', test_acc) pcs.log_() # use log-domain parametrization of p(c|s) # make CG and p(c|s) matrix trainable T.set_grad_enabled(True) cg.wcg.requires_grad = True pcs.requires_grad = True opt = T.optim.SGD([cg.wcg, pcs], lr=args.learning_rate, momentum=args.momentum) nll = T.nn.NLLLoss().to(device) best_val_acc, best_test_acc = 0., 0. print('Made p(c|s) and CG parameters trainable, finetuning for {} epochs'. format(args.finetune_steps)) for i in range(args.finetune_steps): opt.zero_grad() logit_posterior = cg(features.to(device)).view(-1, cg.size**2) posterior_pred = (logit_posterior / args.alpha).softmax(1) # compute predictions probs = T.matmul(posterior_pred, pcs.softmax(1)) preds = probs.argmax(1).cpu().numpy() # get the current val/test accuracy with T.no_grad(): valacc = ((preds[val_mask] == labels[val_mask].argmax(1)).sum()) / val_mask.sum() testacc = ( (preds[test_mask] == labels[test_mask].argmax(1)).sum()) / test_mask.sum() if valacc > best_val_acc: best_val_acc, best_test_acc = valacc, testacc print('Epoch {} of {}: validation accuracy {}'.format( i, args.finetune_steps, valacc)) loss = nll(T.log(probs[train_mask]), T.from_numpy(labels[train_mask].argmax(1)).to(device)) loss.backward() opt.step() with T.no_grad(): cg.clamp() print('Best val accuracy', best_val_acc) print('Test accuracy at best epoch', best_test_acc) print('Total execution time', time.time() - start)