def main(args): args['device'] = "cuda" if torch.cuda.is_available() else "cpu" set_random_seed() # Interchangeable with other datasets if args['dataset'] == 'Tox21': from dgl.data.chem import Tox21 dataset = Tox21() trainset, valset, testset = split_dataset(dataset, args['train_val_test_split']) train_loader = DataLoader(trainset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) val_loader = DataLoader(valset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) test_loader = DataLoader(testset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: # Interchangeable with other models if args['model'] == 'GCN': model = model_zoo.chem.GCNClassifier(in_feats=args['in_feats'], gcn_hidden_feats=args['gcn_hidden_feats'], classifier_hidden_feats=args['classifier_hidden_feats'], n_tasks=dataset.n_tasks) elif args['model'] == 'GAT': model = model_zoo.chem.GATClassifier(in_feats=args['in_feats'], gat_hidden_feats=args['gat_hidden_feats'], num_heads=args['num_heads'], classifier_hidden_feats=args['classifier_hidden_feats'], n_tasks=dataset.n_tasks) loss_criterion = BCEWithLogitsLoss(pos_weight=dataset.task_pos_weights.to(args['device']), reduction='none') optimizer = Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_roc_auc = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_roc_auc, model) print('epoch {:d}/{:d}, validation roc-auc score {:.4f}, best validation roc-auc score {:.4f}'.format( epoch + 1, args['num_epochs'], val_roc_auc, stopper.best_score)) if early_stop: break if not args['pre_trained']: stopper.load_checkpoint(model) test_roc_auc = run_an_eval_epoch(args, model, test_loader) print('test roc-auc score {:.4f}'.format(test_roc_auc))
def main(args): g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args['dataset']) dev = torch.device("cuda:0" if args['gpu'] >= 0 else "cpu") features = features.to(dev) labels = labels.to(dev) train_mask = train_mask.to(dev) val_mask = val_mask.to(dev) test_mask = test_mask.to(dev) model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(dev) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'. format(test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): args['device'] = "cuda" if torch.cuda.is_available() else "cpu" set_random_seed() # Interchangeable with other datasets train_set, val_set, test_set = load_dataset_for_regression(args) train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if test_set is not None: test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['model'] == 'MPNN': model = model_zoo.chem.MPNNModel(node_input_dim=args['node_in_feats'], edge_input_dim=args['edge_in_feats'], output_dim=args['output_dim']) elif args['model'] == 'SCHNET': model = model_zoo.chem.SchNet(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) elif args['model'] == 'MGCN': model = model_zoo.chem.MGCNModel(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) model.to(args['device']) loss_fn = nn.MSELoss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(mode='lower', patience=args['patience']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if test_set is not None: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(args): args['device'] = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) train_set, val_set, test_set = load_dataset_for_regression(args) train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) if test_set is not None: test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: model = load_model(args) if args['model'] in ['SCHNET', 'MGCN']: model.set_mean_std(train_set.mean, train_set.std, args['device']) loss_fn = nn.MSELoss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) stopper = EarlyStopping(mode='lower', patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if test_set is not None: if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(args): args['device'] = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) # Interchangeable with other datasets dataset, train_set, val_set, test_set = load_dataset_for_classification( args) train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) test_loader = DataLoader(test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: args['n_tasks'] = dataset.n_tasks model = load_model(args) loss_criterion = BCEWithLogitsLoss( pos_weight=dataset.task_pos_weights.to(args['device']), reduction='none') optimizer = Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def train(gpu, args): rank = args.nr * args.gpus + gpu print(rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) cuda_string = 'cuda' + ':' + str(gpu) device = torch.device( cuda_string) if torch.cuda.is_available() else torch.device("cpu") g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args.dataset) if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() print(train_mask.size()) train_mask = th.split(train_mask, math.ceil(len(train_mask) / args.gpus))[rank] labels = th.split(labels, math.ceil(len(labels) / args.gpus))[rank] features = th.split(features, math.ceil(len(features) / args.gpus))[rank] #g = th.split(g, math.ceil(len(g) / args.gpus))[rank] print(train_mask.size(), labels.size(), features.size(), len(g)) print(type(g)) features = features.to(device) labels = labels.to(device) train_mask = train_mask.to(device) val_mask = val_mask.to(device) test_mask = test_mask.to(device) if args.hetero: from model_hetero import HAN model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], in_size=features.shape[1], hidden_size=args.hidden_units, out_size=num_classes, num_heads=args.num_heads, dropout=args.dropout).to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu]) g = g.to(device) else: from model import HAN model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args.hidden_units, out_size=num_classes, num_heads=args.num_heads, dropout=args.dropout).to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu]) g = [graph.to(device) for graph in g] stopper = EarlyStopping(patience=args.patience) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.num_epochs): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'. format(test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args['dataset']) if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() features = features.to(args['device']) labels = labels.to(args['device']) train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) if args['hetero']: from model_hetero import HAN model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = g.to(args['device']) else: from model import HAN model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = [graph.to(args['device']) for graph in g] stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. args_academic = read_args() data = dataprocess_han.input_data_han(args_academic) #g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ #val_mask, test_mask = load_data(args['dataset']) features = torch.tensor(data.a_text_embed, dtype=torch.float32) labels = torch.tensor(data.a_class) APA_g = dgl.graph(data.APA_matrix, ntype='author', etype='coauthor') APVPA_g = dgl.graph(data.APVPA_matrix, ntype='author', etype='attendance') APPA_g = dgl.graph(data.APPA_matrix, ntype='author', etype='reference') #g = [APA_g, APPA_g] g = [APA_g, APVPA_g, APPA_g] num_classes = 4 features = features.to(args['device']) labels = labels.to(args['device']) #if args['hetero']: #from model_hetero import HAN #model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], #in_size=features.shape[1], #hidden_size=args['hidden_units'], #out_size=num_classes, #num_heads=args['num_heads'], #dropout=args['dropout']).to(args['device']) #else: model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) model.load_state_dict(torch.load("./model_para.pt")) for epoch in range(args['num_epochs']): X = [[i] for i in range(args_academic.A_n)] train_X, test_X, _, _ = train_test_split(X, X, test_size=0.8) # train_X, test_X, _, _ = train_test_split(train_X, train_X, test_size=0.2) # train_mask = get_binary_mask(args_academic.A_n, train_X) test_mask = get_binary_mask(args_academic.A_n, test_X) #train_mask = torch.tensor(data.train_mask) #test_mask = torch.tensor(data.test_mask) val_mask = test_mask train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) model.train() logits, _ = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) model.eval() _, embedding = model(g, features) embed_file = open("./node_embedding.txt", "w") for k in range(embedding.shape[0]): embed_file.write('a' + str(k) + " ") for l in range(embedding.shape[1] - 1): embed_file.write(str(embedding[k][l].item()) + " ") embed_file.write(str(embedding[k][-1].item()) + "\n") embed_file.close() #test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn) #print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( #test_loss.item(), test_micro_f1, test_macro_f1)) torch.save(model.state_dict(), "./model_para.pt")
def main(args): g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_imdb_raw() if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() features_m, features_a, features_d = features features_a = torch.zeros(features_a.shape[0], 10) features_d = torch.zeros(features_d.shape[0], 10) features_m = features_m.to(args['device']) features_a = features_a.to(args['device']) features_d = features_d.to(args['device']) features = {'movie': features_m, 'actor': features_a, 'director':features_d} in_size = {'actor': features_a.shape[1], 'movie': features_m.shape[1], 'director': features_d.shape[1]} labels = labels.to(args['device']) train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) model = HMSG(meta_paths = [['ma','am'], ['md', 'dm'], ['am'], ['dm']], in_size = in_size, hidden_size = args['hidden_units'], out_size = num_classes, aggre_type = 'attention', num_heads = args['num_heads'], dropout = args['dropout']).to(args['device']) g = g.to(args['device']) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() z, logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1, z = evaluate(model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1, z = evaluate(model, g, features, labels, test_mask, loss_fcn) emd_imdb, label_imdb = z[test_mask], labels[test_mask] np.savetxt('./out/emd_imdb.txt',emd_imdb.cpu()) np.savetxt('./out/label_imdb.txt', np.array(label_imdb.cpu(), dtype=np.int32)) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): g, features, train_val_test_pos_user_item, train_val_test_neg_user_item = load_amazon_raw( ) user_feats, item_feats = features # user_feats = torch.zeros(user_feats.shape[0], 100) # item_feats = torch.zeros(item_feats.shape[0], 100) user_feats = user_feats.to(args['device']) item_feats = item_feats.to(args['device']) features = {'user': user_feats, 'item': item_feats} in_size = {'user': user_feats.shape[1], 'item': item_feats.shape[1]} train_pos_user_item = train_val_test_pos_user_item['train_pos_user_item'] val_pos_user_item = train_val_test_pos_user_item['val_pos_user_item'] test_pos_user_item = train_val_test_pos_user_item['test_pos_user_item'] train_neg_user_item = train_val_test_neg_user_item['train_neg_user_item'] val_neg_user_item = train_val_test_neg_user_item['val_neg_user_item'] test_neg_user_item = train_val_test_neg_user_item['test_neg_user_item'] np.random.shuffle(train_neg_user_item) train_neg_user_item = train_neg_user_item[:train_pos_user_item.shape[0]] np.random.shuffle(val_neg_user_item) val_neg_user_item = val_neg_user_item[:val_pos_user_item.shape[0]] np.random.shuffle(test_neg_user_item) test_neg_user_item = test_neg_user_item[:test_pos_user_item.shape[0]] y_true_test = np.array([1] * len(test_pos_user_item) + [0] * len(test_neg_user_item)) auc_list = [] ap_list = [] model = HMSG( meta_paths=[['ui', 'iu'], ['iu', 'ui'], ['ui'], ['iu']], # in_size=in_size, hidden_size=args['hidden_units'], aggre_type='mean', num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = g.to(args['device']) stopper = EarlyStopping(patience=args['patience']) optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() embeddings = model(g, features) user_embed, item_embed = embeddings['user'], embeddings['item'] pos_embedding_user = user_embed[train_pos_user_item[:, 0]].view( -1, 1, user_embed.shape[1]) pos_embedding_item = item_embed[train_pos_user_item[:, 1]].view( -1, item_embed.shape[1], 1) neg_embedding_user = user_embed[train_neg_user_item[:, 0]].view( -1, 1, user_embed.shape[1]) neg_embedding_item = item_embed[train_neg_user_item[:, 1]].view( -1, item_embed.shape[1], 1) pos_out = torch.bmm(pos_embedding_user, pos_embedding_item) #.view(-1, 5) # pos_out = torch.mean(pos_out, dim=1) neg_out = -torch.bmm(neg_embedding_user, neg_embedding_item) #.view(-1, 5) # neg_out = torch.sum(neg_out, dim=1) train_loss = -torch.mean(F.logsigmoid(pos_out) + F.logsigmoid(neg_out)) # print(train_loss.item()) optimizer.zero_grad() train_loss.backward() optimizer.step() model.eval() with torch.no_grad(): embeddings = model(g, features) user_embed, item_embed = embeddings['user'], embeddings['item'] pos_embedding_user = user_embed[val_pos_user_item[:, 0]].view( -1, 1, user_embed.shape[1]) pos_embedding_item = item_embed[val_pos_user_item[:, 1]].view( -1, item_embed.shape[1], 1) neg_embedding_user = user_embed[val_neg_user_item[:, 0]].view( -1, 1, user_embed.shape[1]) neg_embedding_item = item_embed[val_neg_user_item[:, 1]].view( -1, item_embed.shape[1], 1) pos_out = torch.bmm(pos_embedding_user, pos_embedding_item) neg_out = -torch.bmm(neg_embedding_user, neg_embedding_item) val_loss = -torch.mean( F.logsigmoid(pos_out) + F.logsigmoid(neg_out)) early_stop = stopper.step(val_loss.data.item(), model) if early_stop: break stopper.load_checkpoint(model) model.eval() with torch.no_grad(): embeddings = model(g, features) user_embed, item_embed = embeddings['user'], embeddings['item'] pos_embedding_user = user_embed[test_pos_user_item[:, 0]].view( -1, 1, user_embed.shape[1]) pos_embedding_item = item_embed[test_pos_user_item[:, 1]].view( -1, item_embed.shape[1], 1) neg_embedding_user = user_embed[test_neg_user_item[:, 0]].view( -1, 1, user_embed.shape[1]) neg_embedding_item = item_embed[test_neg_user_item[:, 1]].view( -1, item_embed.shape[1], 1) pos_out = torch.bmm(pos_embedding_user, pos_embedding_item).flatten() neg_out = torch.bmm(neg_embedding_user, neg_embedding_item).flatten() pos_proba = torch.sigmoid(pos_out) neg_proba = torch.sigmoid(neg_out) y_proba_test = list(pos_proba.cpu().numpy()) + list( neg_proba.cpu().numpy()) auc = roc_auc_score(y_true_test, y_proba_test) ap = average_precision_score(y_true_test, y_proba_test) print('--------------Link Prediction Test--------------') print('AUC = {:.4f}'.format(auc)) print('AP = {:.4f}'.format(ap)) np.save('./out/res.npy', [auc, ap])
def main(args): # acm data if args['dataset'] == 'ACMRaw': from utils import load_data g, features, labels, n_classes, train_nid, val_nid, test_nid, train_mask, \ val_mask, test_mask = load_data('ACMRaw') metapath_list = [['pa', 'ap'], ['pf', 'fp']] else: raise NotImplementedError('Unsupported dataset {}'.format( args['dataset'])) # Is it need to set different neighbors numbers for different meta-path based graph? num_neighbors = args['num_neighbors'] han_sampler = HANSampler(g, metapath_list, num_neighbors) # Create PyTorch DataLoader for constructing blocks dataloader = DataLoader(dataset=train_nid, batch_size=args['batch_size'], collate_fn=han_sampler.sample_blocks, shuffle=True, drop_last=False, num_workers=4) model = HAN(num_metapath=len(metapath_list), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=n_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) total_params = sum(p.numel() for p in model.parameters()) print("total_params: {:d}".format(total_params)) total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print("total trainable params: {:d}".format(total_trainable_params)) stopper = EarlyStopping(patience=args['patience']) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() for step, (seeds, blocks) in enumerate(dataloader): h_list = load_subtensors(blocks, features) blocks = [block.to(args['device']) for block in blocks] hs = [h.to(args['device']) for h in h_list] logits = model(blocks, hs) loss = loss_fn(logits, labels[numpy.asarray(seeds)].to(args['device'])) optimizer.zero_grad() loss.backward() optimizer.step() # print info in each batch train_acc, train_micro_f1, train_macro_f1 = score( logits, labels[numpy.asarray(seeds)]) print( "Epoch {:d} | loss: {:.4f} | train_acc: {:.4f} | train_micro_f1: {:.4f} | train_macro_f1: {:.4f}" .format(epoch + 1, loss, train_acc, train_micro_f1, train_macro_f1)) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, metapath_list, num_neighbors, features, labels, val_nid, loss_fn, args['batch_size']) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Val loss {:.4f} | Val Accuracy {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}' .format(epoch + 1, val_loss.item(), val_acc, val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, metapath_list, num_neighbors, features, labels, test_nid, loss_fn, args['batch_size']) print( 'Test loss {:.4f} | Test Accuracy {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}' .format(test_loss.item(), test_acc, test_micro_f1, test_macro_f1))