Ejemplo n.º 1
0
def test_ethane_model():
    import torch
    import hpno
    import dgllife
    g = dgllife.utils.smiles_to_bigraph("CC", explicit_hydrogens=True)
    g = hpno.heterograph(g)
    g.nodes['n1'].data['h'] = torch.zeros(8, 3)
    model = hpno.HierarchicalPathNetwork(3, 4, 5, 6)
    feat = model(g, g.nodes['n1'].data['h'])
    assert feat.shape == torch.Size([8, 4])
def test_model_equivariance(graphs_and_features):
    g0, g1, h0, h1, permutation_matrix = graphs_and_features

    import hpno
    model = hpno.HierarchicalPathNetwork(3, 4, 5, 2, max_level=4)
    y0 = model(g0, h0)
    y1 = model(g1, h1)
    npt.assert_almost_equal(
        (permutation_matrix @ y0).detach().numpy(),
        y1.detach().numpy(),
        decimal=5,
    )
def test_model_and_readout_invariance(graphs_and_features):
    g0, g1, h0, h1, permutation_matrix = graphs_and_features

    import hpno
    readout = hpno.HierarchicalPathNetwork(3,
                                           5,
                                           5,
                                           2,
                                           max_level=4,
                                           readout=hpno.GraphReadout(5, 5, 6))

    y0 = readout(g0, h0)
    y1 = readout(g1, h1)
    npt.assert_almost_equal(
        y0.detach().numpy(),
        y1.detach().numpy(),
        decimal=5,
    )
Ejemplo n.º 4
0
def run(args):
    from ogb.graphproppred import DglGraphPropPredDataset, Evaluator, collate_dgl
    from torch.utils.data import DataLoader

    dataset = DglGraphPropPredDataset(name="ogbg-molhiv")

    import os
    if not os.path.exists("heterographs.bin"):
        dataset.graphs = [hpno.heterograph(graph) for graph in dataset.graphs]
        from dgl.data.utils import save_graphs
        save_graphs("heterographs.bin", dataset.graphs)
    else:
        from dgl.data.utils import load_graphs
        dataset.graphs = load_graphs("heterographs.bin")[0]

    evaluator = Evaluator(name="ogbg-molhiv")
    in_features = 9
    out_features = 1

    split_idx = dataset.get_idx_split()
    train_loader = DataLoader(dataset[split_idx["train"]], batch_size=128, drop_last=True, shuffle=True, collate_fn=collate_dgl)
    valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=len(split_idx["valid"]), shuffle=False, collate_fn=collate_dgl)
    test_loader = DataLoader(dataset[split_idx["test"]], batch_size=len(split_idx["test"]), shuffle=False, collate_fn=collate_dgl)

    model = hpno.HierarchicalPathNetwork(
        in_features=in_features,
        out_features=args.hidden_features,
        hidden_features=args.hidden_features,
        depth=args.depth,
        readout=hpno.GraphReadout(
            in_features=args.hidden_features,
            out_features=out_features,
            hidden_features=args.hidden_features,
        )
    )


    if torch.cuda.is_available():
        model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), args.learning_rate, weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=20)

    for idx_epoch in range(args.n_epochs):
        print(idx_epoch, flush=True)
        model.train()
        for g, y in train_loader:
            y = y.float()
            if torch.cuda.is_available():
                g = g.to("cuda:0")
                y = y.cuda()
            optimizer.zero_grad()
            y_hat = model.forward(g, g.nodes['n1'].data["feat"].float())
            loss = torch.nn.BCELoss()(
                input=y_hat.sigmoid(),
                target=y,
            )
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            g, y = next(iter(valid_loader))
            y = y.float()
            if torch.cuda.is_available():
                g = g.to("cuda:0")
                y = y.cuda()
            y_hat = model.forward(g, g.nodes['n1'].data["feat"].float())
            loss = torch.nn.BCELoss()(
                input=y_hat.sigmoid(),
                target=y,
            )
            scheduler.step(loss)

        if optimizer.param_groups[0]["lr"] <= 0.01 * args.learning_rate: break

    model = model.cpu()
    g, y = next(iter(valid_loader))
    rocauc_vl = evaluator.eval(
        {
            "y_true": y.float(),
            "y_pred": model.forward(g, g.nodes['n1'].data["feat"].float()).sigmoid()
        }
    )["rocauc"]

    g, y = next(iter(test_loader))
    rocauc_te = evaluator.eval(
        {
            "y_true": y.float(),
            "y_pred": model.forward(g, g.nodes['n1'].data["feat"].float()).sigmoid()
        }
    )["rocauc"]

    import pandas as pd
    df = pd.DataFrame(
        {
            args.data: {
                "rocauc_te": rocauc_te,
                "rocauc_vl": rocauc_vl,
            }
        }
    )

    df.to_csv("%s.csv" % args.out)
Ejemplo n.º 5
0
def run(args):
    from dgl.data.utils import load_graphs
    ds_tr, y_tr = load_graphs("ds_tr.bin")
    ds_vl, y_vl = load_graphs("ds_vl.bin")
    ds_te, y_te = load_graphs("ds_te.bin")
    y_tr = y_tr["label"].float()
    y_vl = y_vl["label"].float()
    y_te = y_te["label"].float()
    g_tr = dgl.batch(ds_tr)
    g_vl = dgl.batch(ds_vl)
    g_te = dgl.batch(ds_te)

    in_features = 1
    out_features = 8

    model = hpno.HierarchicalPathNetwork(
        in_features=in_features,
        out_features=args.hidden_features,
        hidden_features=args.hidden_features,
        depth=args.depth,
        readout=hpno.GraphReadout(
            in_features=args.hidden_features,
            out_features=out_features,
            hidden_features=args.hidden_features,
        ))

    if torch.cuda.is_available():
        model = model.cuda()
        y_tr = y_tr.cuda()
        y_vl = y_vl.cuda()
        y_te = y_te.cuda()
        g_tr = g_tr.to("cuda:0")
        g_vl = g_vl.to("cuda:0")
        g_te = g_te.to("cuda:0")

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.learning_rate,
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           "min",
                                                           factor=0.5,
                                                           patience=20)

    # dumb feature
    g_tr.nodes['n1'].data['h'] = torch.zeros(g_tr.number_of_nodes('n1'),
                                             1,
                                             device=y_tr.device)
    g_vl.nodes['n1'].data['h'] = torch.zeros(g_vl.number_of_nodes('n1'),
                                             1,
                                             device=y_tr.device)
    g_te.nodes['n1'].data['h'] = torch.zeros(g_te.number_of_nodes('n1'),
                                             1,
                                             device=y_tr.device)

    for idx_epoch in range(args.n_epochs):
        print(idx_epoch, flush=True)
        model.train()
        optimizer.zero_grad()
        y_hat = model.forward(g_tr, g_tr.nodes['n1'].data["h"].float())
        loss = torch.nn.CrossEntropyLoss()(
            input=y_hat,
            target=y_tr.long(),
        )
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            y_hat = model.forward(g_vl, g_vl.nodes['n1'].data["h"].float())
            loss = torch.nn.CrossEntropyLoss()(
                input=y_hat,
                target=y_vl.long(),
            )

            scheduler.step(loss)

        if optimizer.param_groups[0]["lr"] <= 0.01 * args.learning_rate: break

    accuracy_tr = (model(g_tr, g_tr.nodes['n1'].data['h'].float()).argmax(
        dim=-1) == y_tr).sum() / y_tr.shape[0]
    accuracy_te = (model(g_te, g_te.nodes['n1'].data['h'].float()).argmax(
        dim=-1) == y_te).sum() / y_te.shape[0]

    print(accuracy_tr)
    print(accuracy_te)