def main(args): args['device'] = "cuda" if torch.cuda.is_available() else "cpu" set_random_seed() # Interchangeable with other datasets if args['dataset'] == 'Tox21': from dgl.data.chem import Tox21 dataset = Tox21() trainset, valset, testset = split_dataset(dataset, args['train_val_test_split']) train_loader = DataLoader(trainset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) val_loader = DataLoader(valset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) test_loader = DataLoader(testset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: # Interchangeable with other models if args['model'] == 'GCN': model = model_zoo.chem.GCNClassifier(in_feats=args['in_feats'], gcn_hidden_feats=args['gcn_hidden_feats'], classifier_hidden_feats=args['classifier_hidden_feats'], n_tasks=dataset.n_tasks) elif args['model'] == 'GAT': model = model_zoo.chem.GATClassifier(in_feats=args['in_feats'], gat_hidden_feats=args['gat_hidden_feats'], num_heads=args['num_heads'], classifier_hidden_feats=args['classifier_hidden_feats'], n_tasks=dataset.n_tasks) loss_criterion = BCEWithLogitsLoss(pos_weight=dataset.task_pos_weights.to(args['device']), reduction='none') optimizer = Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_roc_auc = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_roc_auc, model) print('epoch {:d}/{:d}, validation roc-auc score {:.4f}, best validation roc-auc score {:.4f}'.format( epoch + 1, args['num_epochs'], val_roc_auc, stopper.best_score)) if early_stop: break if not args['pre_trained']: stopper.load_checkpoint(model) test_roc_auc = run_an_eval_epoch(args, model, test_loader) print('test roc-auc score {:.4f}'.format(test_roc_auc))
def main(args): args['device'] = "cuda" if torch.cuda.is_available() else "cpu" set_random_seed() # Interchangeable with other datasets train_set, val_set, test_set = load_dataset_for_regression(args) train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if test_set is not None: test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['model'] == 'MPNN': model = model_zoo.chem.MPNNModel(node_input_dim=args['node_in_feats'], edge_input_dim=args['edge_in_feats'], output_dim=args['output_dim']) elif args['model'] == 'SCHNET': model = model_zoo.chem.SchNet(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) elif args['model'] == 'MGCN': model = model_zoo.chem.MGCNModel(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) model.to(args['device']) loss_fn = nn.MSELoss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(mode='lower', patience=args['patience']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if test_set is not None: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(args): g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args['dataset']) dev = torch.device("cuda:0" if args['gpu'] >= 0 else "cpu") features = features.to(dev) labels = labels.to(dev) train_mask = train_mask.to(dev) val_mask = val_mask.to(dev) test_mask = test_mask.to(dev) model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(dev) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'. format(test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): args['device'] = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) train_set, val_set, test_set = load_dataset_for_regression(args) train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) if test_set is not None: test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: model = load_model(args) if args['model'] in ['SCHNET', 'MGCN']: model.set_mean_std(train_set.mean, train_set.std, args['device']) loss_fn = nn.MSELoss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) stopper = EarlyStopping(mode='lower', patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if test_set is not None: if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(args): args['device'] = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) # Interchangeable with other datasets dataset, train_set, val_set, test_set = load_dataset_for_classification( args) train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) test_loader = DataLoader(test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: args['n_tasks'] = dataset.n_tasks model = load_model(args) loss_criterion = BCEWithLogitsLoss( pos_weight=dataset.task_pos_weights.to(args['device']), reduction='none') optimizer = Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(args): args['device'] = "cuda" if torch.cuda.is_available() else "cpu" set_random_seed() # Interchangeable with other datasets if args['dataset'] == 'Alchemy': from dgl.data.chem import TencentAlchemyDataset train_set = TencentAlchemyDataset(mode='dev') val_set = TencentAlchemyDataset(mode='valid') train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_regression) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_regression) if args['model'] == 'MPNN': model = model_zoo.chem.MPNNModel(output_dim=args['output_dim']) elif args['model'] == 'SCHNET': model = model_zoo.chem.SchNet(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) elif args['model'] == 'MGCN': model = model_zoo.chem.MGCNModel(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) model.to(args['device']) loss_fn = nn.MSELoss() score_fn = nn.L1Loss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(mode='lower', patience=args['patience']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, score_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader, score_fn) early_stop = stopper.step(val_score, model) print('epoch {:d}/{:d}, validation score {:.4f}, best validation score {:.4f}'.format( epoch + 1, args['num_epochs'], val_score, stopper.best_score)) if early_stop: break
def train(gpu, args): rank = args.nr * args.gpus + gpu print(rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) cuda_string = 'cuda' + ':' + str(gpu) device = torch.device( cuda_string) if torch.cuda.is_available() else torch.device("cpu") g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args.dataset) if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() print(train_mask.size()) train_mask = th.split(train_mask, math.ceil(len(train_mask) / args.gpus))[rank] labels = th.split(labels, math.ceil(len(labels) / args.gpus))[rank] features = th.split(features, math.ceil(len(features) / args.gpus))[rank] #g = th.split(g, math.ceil(len(g) / args.gpus))[rank] print(train_mask.size(), labels.size(), features.size(), len(g)) print(type(g)) features = features.to(device) labels = labels.to(device) train_mask = train_mask.to(device) val_mask = val_mask.to(device) test_mask = test_mask.to(device) if args.hetero: from model_hetero import HAN model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], in_size=features.shape[1], hidden_size=args.hidden_units, out_size=num_classes, num_heads=args.num_heads, dropout=args.dropout).to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu]) g = g.to(device) else: from model import HAN model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args.hidden_units, out_size=num_classes, num_heads=args.num_heads, dropout=args.dropout).to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu]) g = [graph.to(device) for graph in g] stopper = EarlyStopping(patience=args.patience) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.num_epochs): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'. format(test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args['dataset']) if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() features = features.to(args['device']) labels = labels.to(args['device']) train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) if args['hetero']: from model_hetero import HAN model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = g.to(args['device']) else: from model import HAN model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = [graph.to(args['device']) for graph in g] stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata['feat'] labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx) mask = g.ndata['train_mask'] mask = mx.nd.array(np.nonzero(mask.asnumpy())[0], ctx=ctx) val_mask = g.ndata['val_mask'] val_mask = mx.nd.array(np.nonzero(val_mask.asnumpy())[0], ctx=ctx) test_mask = g.ndata['test_mask'] test_mask = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], ctx=ctx) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes, heads, elu, args.in_drop, args.attn_drop, args.alpha, args.residual) if args.early_stop: stopper = EarlyStopping(patience=100) model.initialize(ctx=ctx) # use optimizer trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr}) dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): logits = model(features) loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(), labels[mask].squeeze()) loss.backward() trainer.step(mask.shape[0]) if epoch >= 3: dur.append(time.time() - t0) print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000)) val_accuracy = evaluate(model, features, labels, val_mask) print("Validation Accuracy {:.4f}".format(val_accuracy)) if args.early_stop: if stopper.step(val_accuracy, model): break print() if args.early_stop: model.load_parameters('model.param') test_accuracy = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(test_accuracy))
def main(opt): data = get_dataset(opt) g = data[0] if opt['gpu'] < 0: cuda = False else: cuda = True g = g.int().to(opt['gpu']) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']] if opt['model'] == 'GAT': model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'], n_classes, heads, F.elu, opt['in_drop'], opt['attn_drop'], opt['negative_slope'], opt['residual'], opt) elif opt['model'] == 'AGNN': model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'], n_classes, opt['in_drop'], opt) print(model) if opt['early_stop']: stopper = EarlyStopping(patience=100) if cuda: model.cuda() # use optimizer optimizer = get_optimizer(opt['optimizer'], parameters=model.parameters(), lr=opt['lr'], weight_decay=opt['weight_decay']) # initialize graph dur = [] for epoch in range(opt['epochs']): # model.train() if epoch >= 3: t0 = time.time() # forward # logits = model(features) # loss = loss_fcn(logits[train_mask], labels[train_mask]) # optimizer.zero_grad() # loss.backward() # optimizer.step() loss, logits = train(model, optimizer, features, train_mask, labels) if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if opt['fastmode']: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if opt['early_stop']: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if opt['early_stop']: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset g, graph_labels = load_graphs( '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin') assert len(g) == 1 g = g[0] data = g.ndata features = torch.FloatTensor(data['feat']) labels = torch.LongTensor(data['label']) if hasattr(torch, 'BoolTensor'): train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = data['test_mask'].bool() num_feats = features.shape[1] n_classes = 47 n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # add self loop g = add_self_loop(g) # g.remove_edges_from(nx.selfloop_edges(g)) # g = DGLGraph(g) # g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] start = time.time() for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) print(f"Time Consuming {np.sum(dur)}, Overall time {time.time() - start}")
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) if args.num_layers <=0: raise ValueError("num layer must be positive int") g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] num_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = HardGAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual, args.k) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def train_idgl(args): data = load_data(args) seed_init(seed=args.seed) dev = torch.device("cuda:0" if args.gpu >= 0 else "cpu") features = torch.FloatTensor(data.features) features = F.normalize(features, p=1, dim=1) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # print(torch.where(test_mask)) # Same train/test split with different init_seed features = features.to(dev) labels = labels.to(dev) train_mask = train_mask.to(dev) val_mask = val_mask.to(dev) test_mask = test_mask.to(dev) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model model = IDGL(args, num_feats, n_classes, dev) print(model) es_checkpoint = 'temp/' + time.strftime('%m-%d %H-%M-%S', time.localtime()) + '.pt' stopper = EarlyStopping(patience=100, path=es_checkpoint) model.to(dev) adj = g.adjacency_matrix() # adj = normalize_adj_torch(adj.to_dense()) adj = F.normalize(adj.to_dense(), dim=1, p=1) adj = adj.to(dev) # cla_loss = torch.nn.CrossEntropyLoss() cla_loss = torch.nn.NLLLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] h = None # ! Pretrain res_dict = {'parameters': args.__dict__} for epoch in range(args.pretrain_epochs): logits, _ = model.GCN(features, adj) loss = cla_loss(logits[train_mask], labels[train_mask]) optimizer.zero_grad() # Stops if get annomaly with torch.autograd.detect_anomaly(): loss.backward() optimizer.step() train_acc = accuracy(logits[train_mask], labels[train_mask]) val_acc = evaluate(model, features, labels, val_mask, adj) test_acc = evaluate(model, features, labels, test_mask, adj) print( f"Pretrain-Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f} | TestAcc {test_acc:.4f}" ) if args.early_stop > 0: if stopper.step(val_acc, model): break print(f"Pretrain Test Accuracy: {test_acc:.4f}") print(f"{'=' * 10}Pretrain finished!{'=' * 10}\n\n") if args.early_stop > 0: model.load_state_dict(torch.load(es_checkpoint)) test_acc = evaluate(model, features, labels, test_mask, adj) res_dict['res'] = {'pretrain_acc': f'{test_acc:.4f}'} # ! Train stopper = EarlyStopping(patience=100, path=es_checkpoint) for epoch in range(args.max_epoch): model.train() if epoch >= 3: t0 = time.time() # forward t, adj_sim_prev = 0, None logits, h, adj_sim, adj_feat = model(features, h=None, adj_ori=adj, adj_feat=None, mode='feat', norm_graph_reg_loss=args.ngrl) loss_adj_feat = cal_loss(args, cla_loss, logits, train_mask, labels, adj_sim, features) loss_list = [loss_adj_feat] ori_adj_norm = torch.norm(adj_sim.detach(), p=2) while iter_condition(args, adj_sim_prev, adj_sim, ori_adj_norm, t): t += 1 adj_sim_prev = adj_sim.detach() logits, h, adj_sim, adj_agg = model(features, h, adj, adj_feat, mode='emb', norm_graph_reg_loss=args.ngrl) # exists_zero_lines(h) loss_adj_emb = cal_loss(args, cla_loss, logits, train_mask, labels, adj_sim, features) loss_list.append(loss_adj_emb) loss = torch.mean(torch.stack(loss_list)) optimizer.zero_grad() # Stops if get annomaly with torch.autograd.detect_anomaly(): loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) val_acc = evaluate(model, features, labels, val_mask, adj) test_acc = evaluate(model, features, labels, test_mask, adj) # print( # f"Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f}") print( f"IDGL-Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f} | TestAcc {test_acc:.4f}" ) if args.early_stop > 0: if stopper.step(val_acc, model): break if args.early_stop > 0: model.load_state_dict(torch.load(es_checkpoint)) test_acc = evaluate(model, features, labels, test_mask, adj) print(f"Test Accuracy {test_acc:.4f}") res_dict['res']['IDGL_acc'] = f'{test_acc:.4f}' print(res_dict['res']) print(res_dict['parameters']) return res_dict
def main(args): # load and preprocess dataset g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args) num_feats = features.shape[1] n_edges = g.number_of_edges() print("""----Data statistics------' #use cuda: %d #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (args.gpu, n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() ind_features = ind_features.cuda() labels = labels.cuda() ind_labels = ind_labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual, args.bias) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward pred = model(features) loss = loss_fcn(pred[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_r2 = compute_r2(pred[train_mask], labels[train_mask]) if args.fastmode: val_r2 = compute_r2(pred[val_mask], labels[val_mask]) else: val_r2 = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_r2, model): break if epoch > 3: print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainR2 {:.4f} |" " Val R2 {:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_r2, val_r2, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) evaluate_test(model, features, labels, test_mask, lp_dict, meta="2012") evaluate_test(model, ind_features, ind_labels, test_mask, lp_dict, meta="2016")
def main(args): g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_imdb_raw() if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() features_m, features_a, features_d = features features_a = torch.zeros(features_a.shape[0], 10) features_d = torch.zeros(features_d.shape[0], 10) features_m = features_m.to(args['device']) features_a = features_a.to(args['device']) features_d = features_d.to(args['device']) features = {'movie': features_m, 'actor': features_a, 'director':features_d} in_size = {'actor': features_a.shape[1], 'movie': features_m.shape[1], 'director': features_d.shape[1]} labels = labels.to(args['device']) train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) model = HMSG(meta_paths = [['ma','am'], ['md', 'dm'], ['am'], ['dm']], in_size = in_size, hidden_size = args['hidden_units'], out_size = num_classes, aggre_type = 'attention', num_heads = args['num_heads'], dropout = args['dropout']).to(args['device']) g = g.to(args['device']) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() z, logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1, z = evaluate(model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1, z = evaluate(model, g, features, labels, test_mask, loss_fcn) emd_imdb, label_imdb = z[test_mask], labels[test_mask] np.savetxt('./out/emd_imdb.txt',emd_imdb.cpu()) np.savetxt('./out/label_imdb.txt', np.array(label_imdb.cpu(), dtype=np.int32)) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( test_loss.item(), test_micro_f1, test_macro_f1))
def train_model(model, loss, optimizer, dataloader, train_size, valid_size, model_name='weights', num_epochs=50): writer = SummaryWriter(comment='--{}'.format(model_name)) es = EarlyStopping(patience=5) since = time.time() # best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): start = time.time() print("Epoch {}/{}".format(epoch, num_epochs)) print('-' * 10) for phase in ['train', 'val']: if phase == 'train': model.train() else: model.eval() running_loss = 0.0 running_corrects = 0. for inputs, labels in dataloader[phase]: inputs = inputs.to('cuda:0') labels = labels.to('cuda:0') optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss_ = loss(outputs, labels) if phase == 'train': loss_.backward() optimizer.step() running_loss += loss_.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) del inputs, labels, outputs, preds torch.cuda.empty_cache() data_size = train_size if phase == 'train' else valid_size epoch_loss = running_loss / data_size epoch_acc = running_corrects / data_size if phase == 'train': writer.add_scalar('Loss/train', epoch_loss, epoch) writer.add_scalar('Accuracy/train', epoch_acc, epoch) else: writer.add_scalar('Loss/test', epoch_loss, epoch) writer.add_scalar('Accuracy/test', epoch_acc, epoch) print('{} -> Loss: {:.4f} Acc: {:.4f}'.format( phase, epoch_loss, epoch_acc)) print('\ttime', time.time() - start) if phase == 'val': if es.step(epoch_acc): time_elapsed = time.time() - since print('Early Stopping') print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) return if epoch_acc > best_acc: best_acc = epoch_acc print('Update best acc: {:4f}'.format(best_acc)) torch.save(model.state_dict(), '{}.pt'.format(model_name)) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc))
def main(args): device = "cuda" if torch.cuda.is_available() else "cpu" batch_size = 128 learning_rate = 0.001 num_epochs = 100 set_random_seed() # Interchangeable with other Dataset dataset = Tox21() atom_data_field = 'h' trainset, valset, testset = split_dataset(dataset, [0.8, 0.1, 0.1]) train_loader = DataLoader( trainset, batch_size=batch_size, collate_fn=collate_molgraphs) val_loader = DataLoader( valset, batch_size=batch_size, collate_fn=collate_molgraphs) test_loader = DataLoader( testset, batch_size=batch_size, collate_fn=collate_molgraphs) if args.pre_trained: num_epochs = 0 model = model_zoo.chem.load_pretrained('GCN_Tox21') else: # Interchangeable with other models model = model_zoo.chem.GCNClassifier(in_feats=74, gcn_hidden_feats=[64, 64], n_tasks=dataset.n_tasks) loss_criterion = BCEWithLogitsLoss(pos_weight=torch.tensor( dataset.task_pos_weights).to(device), reduction='none') optimizer = Adam(model.parameters(), lr=learning_rate) stopper = EarlyStopping(patience=10) model.to(device) for epoch in range(num_epochs): model.train() print('Start training') train_meter = Meter() for batch_id, batch_data in enumerate(train_loader): smiles, bg, labels, mask = batch_data atom_feats = bg.ndata.pop(atom_data_field) atom_feats, labels, mask = atom_feats.to(device), labels.to(device), mask.to(device) logits = model(atom_feats, bg) # Mask non-existing labels loss = (loss_criterion(logits, labels) * (mask != 0).float()).mean() optimizer.zero_grad() loss.backward() optimizer.step() print('epoch {:d}/{:d}, batch {:d}/{:d}, loss {:.4f}'.format( epoch + 1, num_epochs, batch_id + 1, len(train_loader), loss.item())) train_meter.update(logits, labels, mask) train_roc_auc = train_meter.roc_auc_averaged_over_tasks() print('epoch {:d}/{:d}, training roc-auc score {:.4f}'.format( epoch + 1, num_epochs, train_roc_auc)) val_meter = Meter() model.eval() with torch.no_grad(): for batch_id, batch_data in enumerate(val_loader): smiles, bg, labels, mask = batch_data atom_feats = bg.ndata.pop(atom_data_field) atom_feats, labels = atom_feats.to(device), labels.to(device) logits = model(atom_feats, bg) val_meter.update(logits, labels, mask) val_roc_auc = val_meter.roc_auc_averaged_over_tasks() if stopper.step(val_roc_auc, model): break print('epoch {:d}/{:d}, validation roc-auc score {:.4f}, best validation roc-auc score {:.4f}'.format( epoch + 1, num_epochs, val_roc_auc, stopper.best_score)) test_meter = Meter() model.eval() for batch_id, batch_data in enumerate(test_loader): smiles, bg, labels, mask = batch_data atom_feats = bg.ndata.pop(atom_data_field) atom_feats, labels = atom_feats.to(device), labels.to(device) logits = model(atom_feats, bg) test_meter.update(logits, labels, mask) print('test roc-auc score {:.4f}'.format(test_meter.roc_auc_averaged_over_tasks()))
def main(args): # load and preprocess dataset data = load_data(args) features = mx.nd.array(data.features) labels = mx.nd.array(data.labels) mask = mx.nd.array(np.where(data.train_mask == 1)) test_mask = mx.nd.array(np.where(data.test_mask == 1)) val_mask = mx.nd.array(np.where(data.val_mask == 1)) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() if args.gpu < 0: ctx = mx.cpu() else: ctx = mx.gpu(args.gpu) features = features.as_in_context(ctx) labels = labels.as_in_context(ctx) mask = mask.as_in_context(ctx) test_mask = test_mask.as_in_context(ctx) val_mask = val_mask.as_in_context(ctx) # create graph g = data.graph # add self-loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes, heads, elu, args.in_drop, args.attn_drop, args.alpha, args.residual) stopper = EarlyStopping(patience=100) model.initialize(ctx=ctx) # use optimizer trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr}) dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): logits = model(features) loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(), labels[mask].squeeze()) loss.backward() trainer.step(mask.shape[0]) if epoch >= 3: dur.append(time.time() - t0) print( "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}" .format(epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000)) val_accuracy = evaluate(model, features, labels, val_mask) print("Validation Accuracy {:.4f}".format(val_accuracy)) if stopper.step(val_accuracy, model): break model.load_parameters('model.param') test_accuracy = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(test_accuracy))
def train(model, data, current_model_dir): optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5) # Check lr and adam es = EarlyStopping(mode="min", patience=30, threshold=0.005, threshold_mode="rel") # Check threshold loss_meters = [] model.train() dc_folder = '{}/dc_img'.format(current_model_dir) if not os.path.exists(dc_folder): os.mkdir(dc_folder) start_time = time.time() for e in range(EPOCHS): loss_meter = AverageMeter() for d in data: message, img = d output, loss = model(message, img) loss.backward() optimizer.step() loss_meter.update(loss.item()) print('Epoch {}, loss {}'.format(e, loss_meter.avg)) # Save only the best model if e == 0 or loss_meter.avg < np.min([m.avg for m in loss_meters]): # First delete the old model if e > 0: old_model_files = [ '{}/{}'.format(current_model_dir, f) for f in os.listdir(current_model_dir) if f.endswith('_model') ] if len(old_model_files) > 0: os.remove(old_model_files[0]) torch.save(model.state_dict(), '{}/{}_model'.format(current_model_dir, e)) loss_meters.append(loss_meter) es.step(loss_meter.avg) if e % 10 == 0: pic = to_img(output.cpu().data if use_gpu else output.data) save_image(pic, '{}/image_{}.png'.format(dc_folder, e)) if es.is_converged: print("Converged in epoch {}".format(e)) break print('Training took {} seconds'.format(time.time() - start_time)) pickle.dump(loss_meters, open('{}/{}_loss_meters.p'.format(current_model_dir, e), 'wb')) return loss_meters
def main(args): # load graph data data = load_data(args.dataset) num_nodes = data.num_nodes train_data = data.train valid_data = data.valid test_data = data.test num_rels = data.num_rels stopper = EarlyStopping(patience=args.patience) # check cuda if args.gpu >= 0: device = torch.device('cuda:%d' % args.gpu) else: device = torch.device('cpu') # create model model = SACN(num_entities=num_nodes, num_relations=num_rels * 2 + 1, args=args) # build graph g = dgl.graph([]) g.add_nodes(num_nodes) src, rel, dst = train_data.transpose() # add reverse edges, reverse relation id is between [num_rels, 2*num_rels) src, dst = np.concatenate((src, dst)), np.concatenate((dst, src)) rel = np.concatenate((rel, rel + num_rels)) # get new train_data with reverse relation train_data_new = np.stack((src, rel, dst)).transpose() # unique train data by (h,r) train_data_new_pandas = pandas.DataFrame(train_data_new) train_data_new_pandas = train_data_new_pandas.drop_duplicates([0, 1]) train_data_unique = np.asarray(train_data_new_pandas) g.add_edges(src, dst) # add graph self loop g.add_edges(g.nodes(), g.nodes()) # add self loop relation type, self loop relation's id is 2*num_rels. rel = np.concatenate((rel, np.ones([num_nodes]) * num_rels * 2)) print(g) entity_id = torch.LongTensor([i for i in range(num_nodes)]) model = model.to(device) g = g.to(device) all_rel = torch.LongTensor(rel).to(device) entity_id = entity_id.to(device) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # process the triples and get all tails corresponding to (h,r) # here valid_dict and test_dict are not used. train_dict, valid_dict, test_dict, all_dict = preprocess_data( train_data, valid_data, test_data, num_rels) train_batch_prepare = TrainBatchPrepare(train_dict, num_nodes) # eval needs to use all the data in train_data, valid_data and test_data eval_batch_prepare = EvalBatchPrepare(all_dict, num_rels) train_dataloader = DataLoader(dataset=train_data_unique, batch_size=args.batch_size, collate_fn=train_batch_prepare.get_batch, shuffle=True, drop_last=False, num_workers=args.num_workers) valid_dataloader = DataLoader(dataset=valid_data, batch_size=args.batch_size, collate_fn=eval_batch_prepare.get_batch, shuffle=False, drop_last=False, num_workers=args.num_workers) test_dataloader = DataLoader(dataset=test_data, batch_size=args.batch_size, collate_fn=eval_batch_prepare.get_batch, shuffle=False, drop_last=False, num_workers=args.num_workers) # training loop print("start training...") for epoch in range(args.n_epochs): model.train() epoch_start_time = time.time() for step, batch_tuple in enumerate(train_dataloader): e1_batch, rel_batch, labels_one_hot = batch_tuple e1_batch = e1_batch.to(device) rel_batch = rel_batch.to(device) labels_one_hot = labels_one_hot.to(device) labels_one_hot = ( (1.0 - 0.1) * labels_one_hot) + (1.0 / labels_one_hot.size(1)) pred = model.forward(g, all_rel, e1_batch, rel_batch, entity_id) optimizer.zero_grad() loss = model.loss(pred, labels_one_hot) loss.backward() optimizer.step() print("epoch : {}".format(epoch)) print("epoch time: {:.4f}".format(time.time() - epoch_start_time)) print("loss: {}".format(loss.data)) model.eval() if epoch % args.eval_every == 0: with torch.no_grad(): val_mrr = ranking_and_hits(g, all_rel, model, valid_dataloader, 'dev_evaluation', entity_id, device) if stopper.step(val_mrr, model): break print("training done") model.load_state_dict(torch.load('es_checkpoint.pt')) ranking_and_hits(g, all_rel, model, test_dataloader, 'test_evaluation', entity_id, device)
def train(model, data, property, current_model_dir): optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Check lr and adam es = EarlyStopping(mode="min", patience=30, threshold=0.005, threshold_mode="rel") # Check threshold model_prop = str(property).split('.')[-1].lower() loss_meters = [] model.train() print('Training model {}'.format(property)) start_time = time.time() for e in range(EPOCHS): loss_meter = AverageMeter() for d in data: message, metadata = d if property == Property.COLOR: one_hot_prop = metadata[:, 0:3] elif property == Property.SHAPE: one_hot_prop = metadata[:, 3:6] elif property == Property.SIZE: one_hot_prop = metadata[:, 6:8] elif property == Property.ROW: one_hot_prop = metadata[:, 9:12] elif property == Property.COLUMN: one_hot_prop = metadata[:, 12:15] loss = model(message, one_hot_prop) loss.backward() optimizer.step() loss_meter.update(loss.item()) print('Epoch {}, loss {}'.format(e, loss_meter.avg)) # Save only the best model if e == 0 or loss_meter.avg < np.min([m.avg for m in loss_meters]): # First delete the old model if e > 0: old_model_files = [ '{}/{}'.format(current_model_dir, f) for f in os.listdir(current_model_dir) if f.endswith('_model') ] if len(old_model_files) > 0: os.remove(old_model_files[0]) torch.save( model.state_dict(), '{}/{}_{}_model'.format(current_model_dir, model_prop, e)) loss_meters.append(loss_meter) es.step(loss_meter.avg) if es.is_converged: print("Converged in epoch {}".format(e)) break print('Training took {} seconds'.format(time.time() - start_time)) pickle.dump( loss_meters, open('{}/{}_{}_loss_meters.p'.format(current_model_dir, model_prop, e), 'wb')) return loss_meters
def main(args): # load and preprocess dataset if args.dataset == 'reddit': data = RedditDataset() elif args.dataset in ['photo', "computer"]: data = MsDataset(args) else: data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() current_time = time.strftime('%d_%H:%M:%S', localtime()) writer = SummaryWriter(log_dir='runs/' + current_time + '_' + args.sess, flush_secs=30) print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.bool().cuda() val_mask = val_mask.bool().cuda() test_mask = test_mask.bool().cuda() g = data.graph # add self loop if args.dataset != 'reddit': g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() print('edge number %d'%(n_edges)) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.idrop, args.adrop, args.alpha, args.bias, args.residual, args.l0) print(model) if args.early_stop: stopper = EarlyStopping(patience=150) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) dur = [] time_used = 0 for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) loss_l0 = args.loss_l0*( model.gat_layers[0].loss) optimizer.zero_grad() (loss + loss_l0).backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) writer.add_scalar('edge_num/0', model.gat_layers[0].num, epoch) if args.fastmode: val_acc, loss = accuracy(logits[val_mask], labels[val_mask], loss_fcn) else: val_acc,_ = evaluate(model, features, labels, val_mask, loss_fcn) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) writer.add_scalar('loss', loss.item(), epoch) writer.add_scalar('f1/train_f1_mic', train_acc, epoch) writer.add_scalar('f1/test_f1_mic', val_acc, epoch) writer.add_scalar('time/time', time_used, epoch) writer.close() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc, _ = evaluate(model,features, labels, test_mask, loss_fcn) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = data.graph # add self loop g.remove_edges_from(g.selfloop_edges()) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset data = load_data(args) if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) with tf.device(device): features = tf.convert_to_tensor(data.features, dtype=tf.float32) labels = tf.convert_to_tensor(data.labels, dtype=tf.int64) train_mask = tf.convert_to_tensor(data.train_mask, dtype=tf.bool) val_mask = tf.convert_to_tensor(data.val_mask, dtype=tf.bool) test_mask = tf.convert_to_tensor(data.test_mask, dtype=tf.bool) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum())) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, tf.nn.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) # loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( # from_logits=False) loss_fcn = tf.nn.sparse_softmax_cross_entropy_with_logits # use optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr, epsilon=1e-8) # initialize graph dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: tape.watch(model.trainable_weights) logits = model(features, training=True) loss_value = tf.reduce_mean( loss_fcn(labels=labels[train_mask], logits=logits[train_mask])) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + \ args.weight_decay*tf.nn.l2_loss(weight) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss_value.numpy().item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_weights('es_checkpoint.pb') acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args['dataset']) if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() # features = features.to(args['device']) features = [f.to(args['device']) for f in features] labels = labels.to(args['device']) train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) if args['hetero']: from model_hetero import SS_HAN model = SS_HAN(muti_meta_paths= [[['pa', 'ap'], ['pf', 'fp']], [['ap', 'pa']], [['fp', 'pf']]], in_size=features[0].shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = g.to(args['device']) else: from model import HAN model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = [graph.to(args['device']) for graph in g] stopper = EarlyStopping(patience=args['patience']) # loss_fcn = F.binary_cross_entropy_with_logits loss_fcn = torch.nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) # lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95) print('*****************************Pre-training Starting*************************************') for epoch in range(args['pretrain_epochs']): model.train() for idx in range(args['batch_size']): embeddings = model(g, features) pos_edge_index, neg_edge_index = sample(g, 1) link_logits = model.calculate_loss(embeddings, pos_edge_index, neg_edge_index) link_labels = get_link_labels(pos_edge_index, neg_edge_index) loss = loss_fcn(link_logits, link_labels) link_probs = link_logits.sigmoid().detach().numpy() acc = roc_auc_score(link_labels, link_probs) optimizer.zero_grad() loss.backward() optimizer.step() # print('link_labels : {}'.format(link_labels)) # print('link_probs : {}'.format(link_probs)) print('epoch: {} || batch_size : {} || loss: {} || accuracy: {}'.format(epoch, idx, loss, acc)) # lr_scheduler.step() early_stop = stopper.step(model, epoch, loss.item(), acc) if early_stop: break filename = './model/ss-han_{}_{:02f}_{:02f}'.format(epoch, loss, acc) torch.save(model.state_dict(), filename) print('*****************************Pre-training Ending*************************************') print('\n') print('*****************************Fine-tuning Starting*************************************') # freeze the pretrained parameter for parms in model.parameters(): parms.requires_grad = False from model_hetero import Classifier classifier = Classifier(in_size=args['hidden_units']*args['num_heads'][-1], hidden_size=128, out_size=num_classes) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(classifier.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['fine-tuning_epochs']): model.train() embeddings = model(g, features) output = classifier(embeddings[0]) loss = loss_fcn(output[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score(output[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 \ = evaluate(model, classifier, g, features, labels, val_mask, loss_fcn) print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) print('*****************************Fine-tuning Ending*************************************') test_loss, test_acc, test_micro_f1, test_macro_f1 \ = evaluate(model, classifier, g, features, labels, val_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. args_academic = read_args() data = dataprocess_han.input_data_han(args_academic) #g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ #val_mask, test_mask = load_data(args['dataset']) features = torch.tensor(data.a_text_embed, dtype=torch.float32) labels = torch.tensor(data.a_class) APA_g = dgl.graph(data.APA_matrix, ntype='author', etype='coauthor') APVPA_g = dgl.graph(data.APVPA_matrix, ntype='author', etype='attendance') APPA_g = dgl.graph(data.APPA_matrix, ntype='author', etype='reference') #g = [APA_g, APPA_g] g = [APA_g, APVPA_g, APPA_g] num_classes = 4 features = features.to(args['device']) labels = labels.to(args['device']) #if args['hetero']: #from model_hetero import HAN #model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], #in_size=features.shape[1], #hidden_size=args['hidden_units'], #out_size=num_classes, #num_heads=args['num_heads'], #dropout=args['dropout']).to(args['device']) #else: model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) model.load_state_dict(torch.load("./model_para.pt")) for epoch in range(args['num_epochs']): X = [[i] for i in range(args_academic.A_n)] train_X, test_X, _, _ = train_test_split(X, X, test_size=0.8) # train_X, test_X, _, _ = train_test_split(train_X, train_X, test_size=0.2) # train_mask = get_binary_mask(args_academic.A_n, train_X) test_mask = get_binary_mask(args_academic.A_n, test_X) #train_mask = torch.tensor(data.train_mask) #test_mask = torch.tensor(data.test_mask) val_mask = test_mask train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) model.train() logits, _ = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) model.eval() _, embedding = model(g, features) embed_file = open("./node_embedding.txt", "w") for k in range(embedding.shape[0]): embed_file.write('a' + str(k) + " ") for l in range(embedding.shape[1] - 1): embed_file.write(str(embedding[k][l].item()) + " ") embed_file.write(str(embedding[k][-1].item()) + "\n") embed_file.close() #test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn) #print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( #test_loss.item(), test_micro_f1, test_macro_f1)) torch.save(model.state_dict(), "./model_para.pt")
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4) graph = dataset[0] num_classes = dataset.num_classes # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = 'cuda:{}'.format(args.gpu) args.num_workers = 0 else: device = 'cpu' # retrieve labels of ground truth labels = graph.ndata['label'].to(device) # Extract node features feat = graph.ndata['feature'].to(device) layers_feat = feat.expand(args.num_layers, -1, -1) # retrieve masks for train/validation/test train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device) # Reinforcement learning module only for positive training nodes rl_idx = th.nonzero(train_mask.to(device) & labels.bool(), as_tuple=False).squeeze(1) graph = graph.to(device) # Step 2: Create model =================================================================== # model = CAREGNN(in_dim=feat.shape[-1], num_classes=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, activation=th.tanh, step_size=args.step_size, edges=graph.canonical_etypes) model = model.to(device) # Step 3: Create training components ===================================================== # _, cnt = th.unique(labels, return_counts=True) loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.early_stop: stopper = EarlyStopping(patience=100) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): # calculate the distance of each edges and sample based on the distance dists = [] p = [] for i in range(args.num_layers): dist = {} graph.ndata['nd'] = th.tanh(model.layers[i].MLP(layers_feat[i])) for etype in graph.canonical_etypes: graph.apply_edges(_l1_dist, etype=etype) dist[etype] = graph.edges[etype].data['ed'] dists.append(dist) p.append(model.layers[i].p) sampler = CARESampler(p, dists, args.num_layers) # train model.train() tr_loss = 0 tr_recall = 0 tr_auc = 0 tr_blk = 0 train_dataloader = dgl.dataloading.NodeDataLoader(graph, train_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers ) for input_nodes, output_nodes, blocks in train_dataloader: blocks = [b.to(device) for b in blocks] train_feature = blocks[0].srcdata['feature'] train_label = blocks[-1].dstdata['label'] logits_gnn, logits_sim = model(blocks, train_feature) # compute loss blk_loss = loss_fn(logits_gnn, train_label) + args.sim_weight * loss_fn(logits_sim, train_label) tr_loss += blk_loss.item() tr_recall += recall_score(train_label.cpu(), logits_gnn.argmax(dim=1).detach().cpu()) tr_auc += roc_auc_score(train_label.cpu(), logits_gnn[:, 1].detach().cpu()) tr_blk += 1 # backward optimizer.zero_grad() blk_loss.backward() optimizer.step() # Reinforcement learning module model.RLModule(graph, epoch, rl_idx, dists) # validation model.eval() val_dataloader = dgl.dataloading.NodeDataLoader(graph, val_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers ) val_recall, val_auc, val_loss = evaluate(model, loss_fn, val_dataloader, device) # Print out performance print("In epoch {}, Train Recall: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; " "Valid Recall: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}". format(epoch, tr_recall / tr_blk, tr_auc / tr_blk, tr_loss / tr_blk, val_recall, val_auc, val_loss)) if args.early_stop: if stopper.step(val_auc, model): break # Test with mini batch after all epoch model.eval() if args.early_stop: model.load_state_dict(th.load('es_checkpoint.pt')) test_dataloader = dgl.dataloading.NodeDataLoader(graph, test_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers ) test_recall, test_auc, test_loss = evaluate(model, loss_fn, test_dataloader, device) print("Test Recall: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format(test_recall, test_auc, test_loss))
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4) graph = dataset[0] num_classes = dataset.num_classes # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = 'cuda:{}'.format(args.gpu) else: device = 'cpu' # retrieve labels of ground truth labels = graph.ndata['label'].to(device) # Extract node features feat = graph.ndata['feature'].to(device) # retrieve masks for train/validation/test train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device) # Reinforcement learning module only for positive training nodes rl_idx = th.nonzero(train_mask.to(device) & labels.bool(), as_tuple=False).squeeze(1) graph = graph.to(device) # Step 2: Create model =================================================================== # model = CAREGNN(in_dim=feat.shape[-1], num_classes=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, activation=th.tanh, step_size=args.step_size, edges=graph.canonical_etypes) model = model.to(device) # Step 3: Create training components ===================================================== # _, cnt = th.unique(labels, return_counts=True) loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.early_stop: stopper = EarlyStopping(patience=100) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): # Training and validation using a full graph model.train() logits_gnn, logits_sim = model(graph, feat) # compute loss tr_loss = loss_fn(logits_gnn[train_idx], labels[train_idx]) + \ args.sim_weight * loss_fn(logits_sim[train_idx], labels[train_idx]) tr_recall = recall_score(labels[train_idx].cpu(), logits_gnn.data[train_idx].argmax(dim=1).cpu()) tr_auc = roc_auc_score(labels[train_idx].cpu(), softmax(logits_gnn, dim=1).data[train_idx][:, 1].cpu()) # validation val_loss = loss_fn(logits_gnn[val_idx], labels[val_idx]) + \ args.sim_weight * loss_fn(logits_sim[val_idx], labels[val_idx]) val_recall = recall_score(labels[val_idx].cpu(), logits_gnn.data[val_idx].argmax(dim=1).cpu()) val_auc = roc_auc_score(labels[val_idx].cpu(), softmax(logits_gnn, dim=1).data[val_idx][:, 1].cpu()) # backward optimizer.zero_grad() tr_loss.backward() optimizer.step() # Print out performance print("Epoch {}, Train: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f} | Val: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}" .format(epoch, tr_recall, tr_auc, tr_loss.item(), val_recall, val_auc, val_loss.item())) # Adjust p value with reinforcement learning module model.RLModule(graph, epoch, rl_idx) if args.early_stop: if stopper.step(val_auc, model): break # Test after all epoch model.eval() if args.early_stop: model.load_state_dict(th.load('es_checkpoint.pt')) # forward logits_gnn, logits_sim = model.forward(graph, feat) # compute loss test_loss = loss_fn(logits_gnn[test_idx], labels[test_idx]) + \ args.sim_weight * loss_fn(logits_sim[test_idx], labels[test_idx]) test_recall = recall_score(labels[test_idx].cpu(), logits_gnn[test_idx].argmax(dim=1).cpu()) test_auc = roc_auc_score(labels[test_idx].cpu(), softmax(logits_gnn, dim=1).data[test_idx][:, 1].cpu()) print("Test Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format(test_recall, test_auc, test_loss.item()))
class ConvModel(Model): def __init__(self, config): super().__init__(config) if config.type == 'conv': # self.dynamics = ConvTransitionModel2().cuda() self.dynamics = ConvTransitionModel2_2().cuda() # uses corrected action self.get_dataset_sample = self.get_dataset_sample_no_speed self.criterion = F.mse_loss elif config.type == 'conv_speed': self.dynamics = ConvTransitionModel3().cuda() self.get_dataset_sample = self.get_dataset_sample_with_speed self.criterion = F.mse_loss elif config.type == 'class': # self.dynamics = ClassificationModel().cuda() # self.dynamics = ClassificationModel2().cuda() # uses corrected phase action self.dynamics = ClassificationModel3().cuda() # uses limited phase history self.get_dataset_sample = self.get_dataset_sample_for_classification self.criterion = torch.nn.BCELoss() elif config.type == 'latent_fc': self.dynamics = LatentFCTransitionModel().cuda() self.get_dataset_sample = self.get_dataset_sample_for_latent_fc self.criterion = F.mse_loss else: raise NotImplementedError self.optim = torch.optim.Adam(self.dynamics.parameters()) self.earlystopping = EarlyStopping(patience=self._c.early_stop_patience) self.set_epoch_length() self.writer = SummaryWriter(log_dir=config.logdir, purge_step=0) def preprocess(self,): pass # def get_sample(self): # return get_dataset_sample(self._dataset) # danijar style get sample # yield method # choose any episode # why yield episode and not sample. # while true # for files in directory: # if not in cache add to cache # for i in random set of cache: # length limitation? # yield i episode # while true # check for files in dir, add new files to cache # for i in train_steps number of episodes (sampled from episode cache): # yield a sample of given length pass def get_dataset_sample_no_speed(self, dataset): s = dataset if isinstance(dataset, dict) else next(dataset) sample = {} sample['phases'] = torch.Tensor(s['phases'][:, 0, 0, 3, :, 0].numpy()).cuda() sample['y'] = self.preprocess(torch.Tensor(s['x'][:, 1, :, :, :, 0].numpy())) sample['v'] = self.preprocess(torch.Tensor(s['x'][:, 0, :, :, :, 1].numpy())) + 0.5 sample['x'] = self.preprocess(torch.Tensor(s['x'][:, 0, :, :, :, 0].numpy())) sample['action'] = torch.Tensor(s['corrected_action'][:, :1].numpy()).cuda() ## not needed for now. sample['reward'] = s['reward'].numpy() # sample['action'] = s['action'].numpy() return sample def get_dataset_sample_with_speed(self, dataset): s = dataset if isinstance(dataset, dict) else next(dataset) sample = {} sample['phases'] = self.preprocess(torch.Tensor(s['phases'][:, 0, 0, 3, :, 0].numpy())) sample['y'] = self.preprocess(torch.Tensor(s['x'][:, 1, :, :, :, :].numpy())).permute(0, 4, 2, 3, 1).squeeze(-1).contiguous() sample['v'] = self.preprocess(torch.Tensor(s['x'][:, 0, :, :, :, 1].numpy())) + 0.5 sample['x'] = self.preprocess(torch.Tensor(s['x'][:, 0, :, :, :, :].numpy())).permute(0, 4, 2, 3, 1).squeeze(-1).contiguous() sample['x'][:, 1] = sample['x'][:, 1] + 0.5 sample['y'][:, 1] = sample['y'][:, 1] + 0.5 ## not needed for now. sample['reward'] = s['reward'].numpy() # sample['action'] = s['action'].numpy() return sample def get_dataset_sample_for_classification(self, dataset): s = dataset if isinstance(dataset, dict) else next(dataset) sample = {} sample['phases'] = torch.Tensor(s['phases'][:, 0, 0, 3, :, 0].numpy()).cuda() sample['y'] = torch.Tensor(s['x'][:, 1, :, :, :, 0].numpy()).cuda() sample['v'] = torch.Tensor(s['x'][:, 0, :, :, :, 1].numpy()).cuda() sample['x'] = torch.Tensor(s['x'][:, 0, :, :, :, 0].numpy()).cuda() sample['action'] = torch.Tensor(s['corrected_action'][:, :1].numpy()).cuda() sample['phase_action'] = torch.Tensor(s['corrected_p_action'][:, 0].numpy()).cuda() # classification model only works on the last lane sample['x'] = sample['x'][:, 0, -1] sample['y'] = sample['y'][:, 0, -1] ## not needed for now. sample['reward'] = s['reward'].numpy() return sample def get_dataset_sample_for_classification_kstep(self, dataset): # to see accuracy of k-step predictions # need formatted samples of higher batch_length s = dataset if isinstance(dataset, dict) else next(dataset) sample = {} sample['phases'] = torch.Tensor(s['phases'][:, :, 0, 3, :, 0].numpy()).cuda() sample['x'] = torch.Tensor(s['x'][:, :, :, :, :, 0].numpy()).cuda() sample['action'] = torch.Tensor(s['corrected_action'][:, :].numpy()).cuda() sample['phase_action'] = torch.Tensor(s['corrected_p_action'][:, :].numpy()).cuda() # classification model only works on the last lane sample['x'] = sample['x'][:, :, 0, -1] sample['reward'] = s['reward'].numpy() return sample def get_dataset_sample_for_latent_fc(self, dataset): s = dataset if isinstance(dataset, dict) else next(dataset) sample = {} sample['phases'] = torch.Tensor(s['phases'][:, 0, 0, 3, :, 0].numpy()).cuda() sample['action'] = torch.Tensor(s['corrected_action'][:, :1].numpy()).cuda() sample['reward'] = s['reward'].numpy() mu = (torch.Tensor(s['mu'].numpy())).cuda() logvar = (torch.Tensor(s['logvar'].numpy())).cuda() latent = reparameterize(mu, logvar) sample['x'] = latent[:, 0] sample['y'] = latent[:, 1] return sample def preprocess(self, x): x = x - 0.5 return x.cuda() def set_epoch_length(self): """ These many number of batches when sampled from the dataset would lead to 1 epoch. """ num_episodes = len(self.train_eps) episode_length = 500 batch_length = self._c.batch_length batch_size = self._c.batch_size self.epoch_length = ceil(num_episodes * (episode_length - (batch_length - 1)) / batch_size) test_num_episodes = len(self.test_eps) self.test_epoch_length = ceil(test_num_episodes * (episode_length - (batch_length - 1)) / batch_size) def batch_update_model(self): # calculate loss # loss.backward() # optim.step() sample = self.get_sample() loss = self._loss(sample) loss.backward() self.optim.step() def train(self): cur_best = None for epoch in range(self._c.epochs): self.train_dynamics(epoch) test_loss = self.test(epoch) # scheduler.step(test_loss) self.earlystopping.step(test_loss) self.writer.file_writer.flush() # checkpointing best_filename = self._c.logdir / 'best.tar' filename = self._c.logdir / f'checkpoint_{epoch}.tar' is_best = not cur_best or test_loss < cur_best if is_best: cur_best = test_loss if is_best or (epoch % 10 == 0): checkpoint = { 'epoch': epoch, 'state_dict': self.dynamics.state_dict(), 'precision': test_loss, 'optimizer': self.optim.state_dict(), 'earlystopping': self.earlystopping.state_dict(), # 'scheduler': scheduler.state_dict(), } save_checkpoint(checkpoint, is_best, filename, best_filename) if self.earlystopping.stop: print("End of Training because of early stopping at epoch {}".format(epoch)) def train_dynamics(self, epoch): print('=======================> epoch:', epoch) self.dynamics.train() train_loss = 0 t1 = time.time() for u in range(self.epoch_length): s = self.get_dataset_sample(self._dataset) self.optim.zero_grad() y_pred = self.dynamics(s) loss = self.criterion(y_pred, s['y']) loss.backward() train_loss += loss self.optim.step() if (u % int(self.epoch_length/min(self.epoch_length, 5)) == 0): t2 = time.time() print(u, round(t2-t1, 2), '{:.10f}'.format(loss.item() / self._c.batch_size)) norm_train_loss = (train_loss / (self.epoch_length * self._c.batch_size)).item() self.writer.add_scalar('train/loss', norm_train_loss, epoch) print('====> Epoch: {} Average loss: {:.10f}'.format(epoch, norm_train_loss)) def test(self, epoch): self.dynamics.eval() test_loss = 0 for u in range(self.test_epoch_length): s = self.get_dataset_sample(self._test_dataset) y_pred = self.dynamics(s) test_loss += F.mse_loss(y_pred, s['y']) norm_test_loss = (test_loss / (self.test_epoch_length * self._c.batch_size)).item() self.writer.add_scalar('test/loss', norm_test_loss, epoch) print('====> Test set loss: {:.10f}'.format(norm_test_loss)) print() return norm_test_loss def save(self): raise NotImplementedError def load(self): raise NotImplementedError def _loss(self): raise NotImplementedError def create_reconstructions(self): pass
class VehModel(Model): def __init__(self, config): super().__init__(config) self.dynamics = VehicleTransitionModel().cuda() self.optim = torch.optim.Adam(self.dynamics.parameters()) self.earlystopping = EarlyStopping( patience=self._c.early_stop_patience) self.set_epoch_length() def preprocess(self, ): pass # def get_sample(self): # return get_dataset_sample(self._dataset) # danijar style get sample # yield method # choose any episode # why yield episode and not sample. # while true # for files in directory: # if not in cache add to cache # for i in random set of cache: # length limitation? # yield i episode # while true # check for files in dir, add new files to cache # for i in train_steps number of episodes (sampled from episode cache): # yield a sample of given length pass def get_dataset_sample(self, dataset): sample = next(dataset) sample = self.preprocess(sample) return sample def preprocess(self, e): bs = self._c.batch_size e['x'] = torch.Tensor(e['x'].numpy()).reshape(bs, -1).cuda() e['y'] = torch.Tensor(e['y'].numpy()).reshape(bs, -1).cuda() e['phases'] = torch.Tensor(e['phases'].numpy()).reshape(bs, -1).cuda() e['x'][:, [0, 2]] = e['x'][:, [0, 2]] / 200 e['x'][:, [1, 3]] = e['x'][:, [1, 3]] / 35 e['y'][:, 0] = e['y'][:, 0] / 200 e['y'][:, 1] = e['y'][:, 1] / 35 return e def set_epoch_length(self): """ These many number of batches when sampled from the dataset would lead to 1 epoch. """ num_episodes = len(self.train_eps) episode_length = 500 batch_length = self._c.batch_length batch_size = self._c.batch_size self.epoch_length = ceil( num_episodes * (episode_length - (batch_length - 1)) / batch_size) test_num_episodes = len(self.test_eps) self.test_epoch_length = ceil(test_num_episodes * (episode_length - (batch_length - 1)) / batch_size) def batch_update_model(self): # calculate loss # loss.backward() # optim.step() sample = self.get_sample() loss = self._loss(sample) loss.backward() self.optim.step() def train(self): cur_best = None for epoch in range(self._c.epochs): self.train_dynamics(epoch) test_loss = self.test() # scheduler.step(test_loss) self.earlystopping.step(test_loss) # checkpointing best_filename = self._c.logdir / 'best.tar' filename = self._c.logdir / f'checkpoint_{epoch}.tar' is_best = not cur_best or test_loss < cur_best if is_best: cur_best = test_loss if is_best or (epoch % 10 == 0): checkpoint = { 'epoch': epoch, 'state_dict': self.dynamics.state_dict(), 'precision': test_loss, 'optimizer': self.optim.state_dict(), 'earlystopping': self.earlystopping.state_dict(), # 'scheduler': scheduler.state_dict(), } save_checkpoint(checkpoint, is_best, filename, best_filename) if self.earlystopping.stop: print("End of Training because of early stopping at epoch {}". format(epoch)) break def train_dynamics(self, epoch): print('=======================> epoch:', epoch) train_loss = 0 t1 = time.time() for u in range(self.epoch_length): s = self.get_dataset_sample(self._dataset) self.optim.zero_grad() y_pred = self.dynamics(s['x'], s['phases']) loss = F.mse_loss(y_pred, s['y']) loss.backward() train_loss += loss self.optim.step() if (u % int(self.epoch_length / min(self.epoch_length, 20)) == 0): t2 = time.time() print(u, round(t2 - t1, 2), '{:.10f}'.format(loss.item() / self._c.batch_size)) print('====> Epoch: {} Average loss: {:.10f}'.format( epoch, train_loss / (self.epoch_length * self._c.batch_size))) def test(self): self.dynamics.eval() test_loss = 0 for u in range(self.test_epoch_length): s = self.get_dataset_sample(self._dataset) y_pred = self.dynamics(s['x'], s['phases']) test_loss += F.mse_loss(y_pred, s['y']) test_loss /= (self.test_epoch_length * self._c.batch_size) print('====> Test set loss: {:.10f}'.format(test_loss)) print() return test_loss def save(self): raise NotImplementedError def load(self): raise NotImplementedError def _loss(self): raise NotImplementedError def create_reconstructions(self): pass