def test_ethane_readout(): import torch import hpno import dgllife g = dgllife.utils.smiles_to_bigraph("CC", explicit_hydrogens=True) g = hpno.heterograph(g) g.nodes['n1'].data['h'] = torch.zeros(8, 3) readout = hpno.GraphReadout(3, 4, 5) feat = readout(g, g.nodes['n1'].data['h']) assert feat.shape == torch.Size([1, 4])
def test_readout_invariance(graphs_and_features): g0, g1, h0, h1, permutation_matrix = graphs_and_features import hpno readout = hpno.GraphReadout(3, 4, 5, max_level=4) y0 = readout(g0, h0) y1 = readout(g1, h1) npt.assert_almost_equal( y0.detach().numpy(), y1.detach().numpy(), decimal=5, )
def test_model_and_readout_invariance(graphs_and_features): g0, g1, h0, h1, permutation_matrix = graphs_and_features import hpno readout = hpno.HierarchicalPathNetwork(3, 5, 5, 2, max_level=4, readout=hpno.GraphReadout(5, 5, 6)) y0 = readout(g0, h0) y1 = readout(g1, h1) npt.assert_almost_equal( y0.detach().numpy(), y1.detach().numpy(), decimal=5, )
def run(args): from ogb.graphproppred import DglGraphPropPredDataset, Evaluator, collate_dgl from torch.utils.data import DataLoader dataset = DglGraphPropPredDataset(name="ogbg-molhiv") import os if not os.path.exists("heterographs.bin"): dataset.graphs = [hpno.heterograph(graph) for graph in dataset.graphs] from dgl.data.utils import save_graphs save_graphs("heterographs.bin", dataset.graphs) else: from dgl.data.utils import load_graphs dataset.graphs = load_graphs("heterographs.bin")[0] evaluator = Evaluator(name="ogbg-molhiv") in_features = 9 out_features = 1 split_idx = dataset.get_idx_split() train_loader = DataLoader(dataset[split_idx["train"]], batch_size=128, drop_last=True, shuffle=True, collate_fn=collate_dgl) valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=len(split_idx["valid"]), shuffle=False, collate_fn=collate_dgl) test_loader = DataLoader(dataset[split_idx["test"]], batch_size=len(split_idx["test"]), shuffle=False, collate_fn=collate_dgl) model = hpno.HierarchicalPathNetwork( in_features=in_features, out_features=args.hidden_features, hidden_features=args.hidden_features, depth=args.depth, readout=hpno.GraphReadout( in_features=args.hidden_features, out_features=out_features, hidden_features=args.hidden_features, ) ) if torch.cuda.is_available(): model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), args.learning_rate, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=20) for idx_epoch in range(args.n_epochs): print(idx_epoch, flush=True) model.train() for g, y in train_loader: y = y.float() if torch.cuda.is_available(): g = g.to("cuda:0") y = y.cuda() optimizer.zero_grad() y_hat = model.forward(g, g.nodes['n1'].data["feat"].float()) loss = torch.nn.BCELoss()( input=y_hat.sigmoid(), target=y, ) loss.backward() optimizer.step() model.eval() with torch.no_grad(): g, y = next(iter(valid_loader)) y = y.float() if torch.cuda.is_available(): g = g.to("cuda:0") y = y.cuda() y_hat = model.forward(g, g.nodes['n1'].data["feat"].float()) loss = torch.nn.BCELoss()( input=y_hat.sigmoid(), target=y, ) scheduler.step(loss) if optimizer.param_groups[0]["lr"] <= 0.01 * args.learning_rate: break model = model.cpu() g, y = next(iter(valid_loader)) rocauc_vl = evaluator.eval( { "y_true": y.float(), "y_pred": model.forward(g, g.nodes['n1'].data["feat"].float()).sigmoid() } )["rocauc"] g, y = next(iter(test_loader)) rocauc_te = evaluator.eval( { "y_true": y.float(), "y_pred": model.forward(g, g.nodes['n1'].data["feat"].float()).sigmoid() } )["rocauc"] import pandas as pd df = pd.DataFrame( { args.data: { "rocauc_te": rocauc_te, "rocauc_vl": rocauc_vl, } } ) df.to_csv("%s.csv" % args.out)
def run(args): from dgl.data.utils import load_graphs ds_tr, y_tr = load_graphs("ds_tr.bin") ds_vl, y_vl = load_graphs("ds_vl.bin") ds_te, y_te = load_graphs("ds_te.bin") y_tr = y_tr["label"].float() y_vl = y_vl["label"].float() y_te = y_te["label"].float() g_tr = dgl.batch(ds_tr) g_vl = dgl.batch(ds_vl) g_te = dgl.batch(ds_te) in_features = 1 out_features = 8 model = hpno.HierarchicalPathNetwork( in_features=in_features, out_features=args.hidden_features, hidden_features=args.hidden_features, depth=args.depth, readout=hpno.GraphReadout( in_features=args.hidden_features, out_features=out_features, hidden_features=args.hidden_features, )) if torch.cuda.is_available(): model = model.cuda() y_tr = y_tr.cuda() y_vl = y_vl.cuda() y_te = y_te.cuda() g_tr = g_tr.to("cuda:0") g_vl = g_vl.to("cuda:0") g_te = g_te.to("cuda:0") optimizer = torch.optim.Adam(model.parameters(), args.learning_rate, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=20) # dumb feature g_tr.nodes['n1'].data['h'] = torch.zeros(g_tr.number_of_nodes('n1'), 1, device=y_tr.device) g_vl.nodes['n1'].data['h'] = torch.zeros(g_vl.number_of_nodes('n1'), 1, device=y_tr.device) g_te.nodes['n1'].data['h'] = torch.zeros(g_te.number_of_nodes('n1'), 1, device=y_tr.device) for idx_epoch in range(args.n_epochs): print(idx_epoch, flush=True) model.train() optimizer.zero_grad() y_hat = model.forward(g_tr, g_tr.nodes['n1'].data["h"].float()) loss = torch.nn.CrossEntropyLoss()( input=y_hat, target=y_tr.long(), ) loss.backward() optimizer.step() model.eval() with torch.no_grad(): y_hat = model.forward(g_vl, g_vl.nodes['n1'].data["h"].float()) loss = torch.nn.CrossEntropyLoss()( input=y_hat, target=y_vl.long(), ) scheduler.step(loss) if optimizer.param_groups[0]["lr"] <= 0.01 * args.learning_rate: break accuracy_tr = (model(g_tr, g_tr.nodes['n1'].data['h'].float()).argmax( dim=-1) == y_tr).sum() / y_tr.shape[0] accuracy_te = (model(g_te, g_te.nodes['n1'].data['h'].float()).argmax( dim=-1) == y_te).sum() / y_te.shape[0] print(accuracy_tr) print(accuracy_te)