コード例 #1
0
def synthetic_data(num_node=3000,
                   num_feature=10,
                   num_class=2,
                   num_important=4):
    gnp = nx.barabasi_albert_graph(num_node, 2)
    gnp.remove_edges_from(nx.selfloop_edges(gnp))
    g = DGLGraph(gnp)
    g.add_edges(g.nodes(), g.nodes())
    data = EasyDict()
    data.graph = gnp
    data.num_labels = num_class
    data.g = g
    data.adj = g.adjacency_matrix(transpose=None).to_dense()
    means = np.zeros(num_node)
    degree = np.zeros((num_node, num_node))
    for i in range(num_node):
        degree[i, i] = data.adj[i].sum()**-0.5
    lap_matrix = np.identity(num_node) - np.matmul(
        np.matmul(degree, data.adj.numpy()), degree)
    cov = np.linalg.inv(lap_matrix + np.identity(num_node))
    data.features = th.from_numpy(
        multivariate_normal(means, cov, num_feature).transpose())
    data.features = data.features.float().abs()
    g.ndata['x'] = data.features
    W = th.randn(num_feature) * 0.1
    W[range(num_important)] = th.Tensor([10, -10, 10, -10])
    data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1)
    logits = th.sigmoid(
        th.matmul(th.matmul(normalize(data.adj, p=1, dim=1), data.features),
                  W))
    labels = th.zeros(num_node)
    labels[logits > 0.5] = 1
    data.labels = labels.long()
    data.size = num_node
    return data
コード例 #2
0
def main():
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # TODO: train test split
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    in_feats = features.shape[1]
    print(features.shape)
    model = VGAE(in_feats, [32, 16], zdim=10, device=device)
    model.train()
    optim = torch.optim.Adam(model.parameters(), lr=1e-4)
    loss_function = BCELoss

    g = DGLGraph(data.graph)
    g.ndata['h'] = features

    n_epochs = 500
    losses = []
    loss = 0.0
    print('Training Start')
    t = trange(n_epochs, desc="Loss: 0.0", leave=True)
    for epoch in t:
        g.ndata['h'] = features

        t.set_description("Loss: {}".format(loss))
        t.refresh()
        # normalization
        adj = g.adjacency_matrix().to_dense()
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
        #g.ndata['norm'] = norm.unsqueeze(1)

        pos_weight = torch.Tensor([
            float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
        ]).to(device)

        z, adj_logits = model.forward(g)

        loss = model.compute_loss(z, adj_logits, adj, norm, pos_weight)

        optim.zero_grad()
        loss.backward()
        optim.step()
        losses.append(loss.item())
        #print('Epoch: {:02d} | Loss: {:.5f}'.format(epoch, loss))

    plt.plot(losses)
    plt.xlabel('iteration')
    plt.ylabel('train loss')
    plt.grid()
    plt.show()
コード例 #3
0
def generate_data(args):
    data = load_data(args)
    labels = torch.LongTensor(data.labels)
    features = torch.FloatTensor(data.features)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)

    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g).to('cuda:0')
    g.add_edges(g.nodes(), g.nodes())
    netg = nx.from_numpy_matrix(g.adjacency_matrix().to_dense().numpy(),
                                create_using=nx.DiGraph)
    print(netg)
    g = dgl.from_networkx(netg, edge_attrs=['weight']).to("cuda:0")
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    print("train_mask-shape", train_mask)
    return g, num_feats, n_classes, heads, cuda, features, labels, train_mask, val_mask, test_mask
コード例 #4
0
def main():
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # TODO: train test split
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    in_feats = features.shape[1]
    print(features.shape)
    model = GAE(in_feats, [32,16])
    model.train()
    optim = torch.optim.Adam(model.parameters(), lr=1e-2)
    loss_function = BCELoss

    g = DGLGraph(data.graph)
    g.ndata['h'] = features


    n_epochs = 500
    losses = []
    print('Training Start')
    for epoch in tqdm(range(n_epochs)):
        g.ndata['h'] = features
        # normalization
        degs = g.in_degrees().float()
        norm = torch.pow(degs, -0.5)
        norm[torch.isinf(norm)] = 0
        g.ndata['norm'] = norm.unsqueeze(1)
        adj = g.adjacency_matrix().to_dense()
        pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()])
        
        
        adj_logits = model.forward(g)#, features)
        
        loss = loss_function(adj_logits, adj, pos_weight=pos_weight)
        optim.zero_grad()
        loss.backward()
        optim.step()
        losses.append(loss.item())
        print('Epoch: {:02d} | Loss: {:.5f}'.format(epoch, loss))
        
    
    plt.plot(losses)
    plt.xlabel('iteration')
    plt.ylabel('train loss')
    plt.grid()
    plt.show()
コード例 #5
0
def two_hop_neighborhood(graph: dgl.DGLGraph) -> dgl.DGLGraph:
    """Increases the connectivity of a given graph by an additional hop

    Args:
        graph (dgl.DGLGraph): Input graph
    Returns:
        dgl.DGLGraph: Output graph
    """
    A = graph.adjacency_matrix().to_dense()
    A_tilde = (1.0 * ((A + A.matmul(A)) >= 1)) - torch.eye(A.shape[0])
    ngraph = nx.convert_matrix.from_numpy_matrix(A_tilde.numpy())
    new_graph = dgl.DGLGraph()
    new_graph.from_networkx(ngraph)
    for k, v in graph.ndata.items():
        new_graph.ndata[k] = v
    for k, v in graph.edata.items():
        new_graph.edata[k] = v
    return new_graph
コード例 #6
0
    def process(mol: Mol, device: torch.device, **kwargs):
        n = mol.GetNumAtoms() + 1

        graph = DGLGraph()
        graph.add_nodes(n)
        graph.add_edges(graph.nodes(), graph.nodes())
        graph.add_edges(range(1, n), 0)
        # graph.add_edges(0, range(1, n))
        for e in mol.GetBonds():
            u, v = e.GetBeginAtomIdx(), e.GetEndAtomIdx()
            graph.add_edge(u + 1, v + 1)
            graph.add_edge(v + 1, u + 1)
        adj = graph.adjacency_matrix(transpose=False).to_dense()

        v, m = feature.mol_feature(mol)
        vec = torch.cat([torch.zeros((1, m)), v]).to(device)

        return ChebNetData(n, adj, vec)
コード例 #7
0
def load_data(dataset="cora"):
    assert dataset in ["cora", "pubmed", "citeseer", "synthetic"]
    if dataset == "cora":
        data = citegrh.load_cora()
    elif dataset == "pubmed":
        data = citegrh.load_pubmed()
    elif dataset == "citeseer":
        data = citegrh.load_citeseer()
    else:
        data = synthetic_data()
    data.features = th.FloatTensor(data.features)
    data.labels = th.LongTensor(data.labels)
    data.size = data.labels.shape[0]
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    data.g = g
    data.adj = g.adjacency_matrix(transpose=None).to_dense()
    data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1)
    print("============Successfully Load %s===============" % dataset)
    return data
コード例 #8
0
ファイル: train.py プロジェクト: qiygan/HeteGSL
def train_idgl(args):
    data = load_data(args)
    seed_init(seed=args.seed)
    dev = torch.device("cuda:0" if args.gpu >= 0 else "cpu")

    features = torch.FloatTensor(data.features)
    features = F.normalize(features, p=1, dim=1)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))
    # print(torch.where(test_mask)) # Same train/test split with different init_seed
    features = features.to(dev)
    labels = labels.to(dev)
    train_mask = train_mask.to(dev)
    val_mask = val_mask.to(dev)
    test_mask = test_mask.to(dev)
    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # create model
    model = IDGL(args, num_feats, n_classes, dev)

    print(model)
    es_checkpoint = 'temp/' + time.strftime('%m-%d %H-%M-%S',
                                            time.localtime()) + '.pt'
    stopper = EarlyStopping(patience=100, path=es_checkpoint)

    model.to(dev)
    adj = g.adjacency_matrix()
    # adj = normalize_adj_torch(adj.to_dense())
    adj = F.normalize(adj.to_dense(), dim=1, p=1)
    adj = adj.to(dev)

    # cla_loss = torch.nn.CrossEntropyLoss()
    cla_loss = torch.nn.NLLLoss()
    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    h = None

    # ! Pretrain
    res_dict = {'parameters': args.__dict__}
    for epoch in range(args.pretrain_epochs):
        logits, _ = model.GCN(features, adj)
        loss = cla_loss(logits[train_mask], labels[train_mask])
        optimizer.zero_grad()
        # Stops if get annomaly
        with torch.autograd.detect_anomaly():
            loss.backward()
        optimizer.step()
        train_acc = accuracy(logits[train_mask], labels[train_mask])
        val_acc = evaluate(model, features, labels, val_mask, adj)
        test_acc = evaluate(model, features, labels, test_mask, adj)
        print(
            f"Pretrain-Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f} | TestAcc {test_acc:.4f}"
        )
        if args.early_stop > 0:
            if stopper.step(val_acc, model):
                break
    print(f"Pretrain Test Accuracy: {test_acc:.4f}")
    print(f"{'=' * 10}Pretrain finished!{'=' * 10}\n\n")
    if args.early_stop > 0:
        model.load_state_dict(torch.load(es_checkpoint))
    test_acc = evaluate(model, features, labels, test_mask, adj)
    res_dict['res'] = {'pretrain_acc': f'{test_acc:.4f}'}
    # ! Train
    stopper = EarlyStopping(patience=100, path=es_checkpoint)
    for epoch in range(args.max_epoch):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        t, adj_sim_prev = 0, None
        logits, h, adj_sim, adj_feat = model(features,
                                             h=None,
                                             adj_ori=adj,
                                             adj_feat=None,
                                             mode='feat',
                                             norm_graph_reg_loss=args.ngrl)
        loss_adj_feat = cal_loss(args, cla_loss, logits, train_mask, labels,
                                 adj_sim, features)
        loss_list = [loss_adj_feat]
        ori_adj_norm = torch.norm(adj_sim.detach(), p=2)

        while iter_condition(args, adj_sim_prev, adj_sim, ori_adj_norm, t):
            t += 1
            adj_sim_prev = adj_sim.detach()
            logits, h, adj_sim, adj_agg = model(features,
                                                h,
                                                adj,
                                                adj_feat,
                                                mode='emb',
                                                norm_graph_reg_loss=args.ngrl)
            # exists_zero_lines(h)
            loss_adj_emb = cal_loss(args, cla_loss, logits, train_mask, labels,
                                    adj_sim, features)
            loss_list.append(loss_adj_emb)
        loss = torch.mean(torch.stack(loss_list))
        optimizer.zero_grad()

        # Stops if get annomaly
        with torch.autograd.detect_anomaly():
            loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        val_acc = evaluate(model, features, labels, val_mask, adj)
        test_acc = evaluate(model, features, labels, test_mask, adj)

        # print(
        #     f"Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f}")
        print(
            f"IDGL-Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f} | TestAcc {test_acc:.4f}"
        )
        if args.early_stop > 0:
            if stopper.step(val_acc, model):
                break
    if args.early_stop > 0:
        model.load_state_dict(torch.load(es_checkpoint))
    test_acc = evaluate(model, features, labels, test_mask, adj)
    print(f"Test Accuracy {test_acc:.4f}")
    res_dict['res']['IDGL_acc'] = f'{test_acc:.4f}'
    print(res_dict['res'])
    print(res_dict['parameters'])
    return res_dict
コード例 #9
0
    #features = torch.FloatTensor(data.features)
    #g = DGLGraph(data.graph).to(device)


    #dataset = da.CoraGraphDataset()

    device = torch.device('cuda')

    #model = Net()
    model = Net().to(device)

    print(data.labels.shape)
    print(data.features.shape)

    #s = torch.sparse_coo_tensor(data.labels, data.features, [16,1433])
    #(s.to_dense()).to(device)
    #features = (data.features).to(device).to_dense()
    g = DGLGraph(data.graph)
    g = dgl.add_self_loop(g)
    g = g.adjacency_matrix(ctx=device)

    #data = dataset[0].to(device)
    print(g.shape)
    g = g.to(device)

    out = model(g, data.features)

    profiler.stop()

    #print(net)
コード例 #10
0
def graph_to_whole_graph(
    g: dgl.DGLGraph
) -> dgl.DGLGraph:
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)
    adj = g.adjacency_matrix()
    d_indices_2, d_indices_3 = get_remote_connection(adj)
    e_data = g.edata['feat']
    g.add_edges(d_indices_2[0], d_indices_2[1])
    g.add_edges(d_indices_3[0], d_indices_3[1])
    g.edata['feat'] = torch.cat(
        [
            torch.cat([e_data, torch.zeros([e_data.size(0), 2])], dim=-1),
            torch.cat(
                [
                    torch.zeros([d_indices_2.size(-1), e_data.size(-1)]),
                    torch.ones([d_indices_2.size(-1), 1]),
                    torch.zeros([d_indices_2.size(-1), 1])
                ], dim=-1),
            torch.cat(
                [
                    torch.zeros([d_indices_3.size(-1), e_data.size(-1)]),
                    torch.zeros([d_indices_3.size(-1), 1]),
                    torch.ones([d_indices_3.size(-1), 1])
                ], dim=-1)
        ],
        dim=0
    )
    g_new = dgl.DGLGraph()
    g_new.add_nodes(g.number_of_nodes() + g.number_of_edges())
    n_add = torch.arange(g.number_of_nodes(), g_new.number_of_nodes())
    ndata_new = torch.cat(
        (
            g.ndata['feat'],
            torch.zeros((g.number_of_nodes(), g.edata['feat'].size(-1)))
        ),
        dim=-1
    )
    edata_new = torch.cat(
        (
            torch.zeros((g.edata['feat'].size(0), g.ndata['feat'].size(-1))),
            g.edata['feat']
        ),
        dim=-1
    )
    all_node_data = torch.cat(
        (ndata_new, edata_new),
        dim=0
    )
    g_new.ndata['feat'] = all_node_data
    all_new_bond_info = torch.cat(
        [
            torch.stack(
                [g.edges()[0], n_add],
                dim=0
            ),
            torch.stack(
                [n_add, g.edges()[0]],
                dim=0
            ),
            torch.stack(
                [g.edges()[1], n_add],
                dim=0
            ),
            torch.stack(
                [n_add, g.edges()[1]],
                dim=0
            ),

        ],
        dim=-1
    )
    g_new.add_edges(all_new_bond_info[0], all_new_bond_info[1])
    return g_new
コード例 #11
0
    def _process(self, graph: dgl.DGLGraph, label: int = None):
        """
        Explain a graph instance

        Args:
            graph (dgl.DGLGraph): Input graph to explain
            label (int): Label attached to the graph. Required.
        """

        sub_adj = graph.adjacency_matrix().to_dense().unsqueeze(dim=0)
        sub_feat = graph.ndata[GNN_NODE_FEAT_IN].unsqueeze(dim=0)

        adj = torch.tensor(sub_adj, dtype=torch.float).to(self.device)
        x = torch.tensor(sub_feat, dtype=torch.float).to(self.device)

        init_logits = self.model(graph)
        init_logits = init_logits.cpu().detach()
        init_probs = torch.nn.Softmax()(init_logits)
        init_pred_label = torch.argmax(init_logits, dim=1).squeeze()

        explainer = ExplainerModel(
            model=deepcopy(self.model),
            adj=adj,
            x=x,
            init_probs=init_probs.to(self.device),
            model_params=self.model_params,
            train_params=self.train_params
        ).to(self.device)

        self.node_feats_explanation = x
        self.probs_explanation = init_probs
        self.node_importance = torch_to_numpy(explainer._get_node_feats_mask())

        self.model.eval()
        explainer.train()

        # Init training stats
        init_probs = init_probs.numpy().squeeze()
        loss = torch.FloatTensor([10000.])

        # log description
        desc = self._set_pbar_desc()
        pbar = tqdm(
            range(
                self.train_params['num_epochs']),
            desc=desc,
            unit='step')

        for _ in pbar:
            logits, masked_feats = explainer()
            loss = explainer.loss(logits)

            # Compute number of non zero elements in the masked adjacency
            node_importance = explainer._get_node_feats_mask()
            node_importance[node_importance < self.node_thresh] = 0.
            masked_feats = masked_feats * \
                torch.stack(masked_feats.shape[-1] * [node_importance], dim=1).unsqueeze(dim=0).to(torch.float)
            probs = torch.nn.Softmax()(logits.cpu().squeeze()).detach().numpy()
            pred_label = torch.argmax(logits, dim=0).squeeze()

            # handle early stopping if the labels is changed
            if pred_label.item() == init_pred_label:
                self.node_feats_explanation = masked_feats
                self.probs_explanation = probs
                self.node_importance = torch_to_numpy(node_importance)
            else:
                print('Predicted label changed. Early stopping.')
                break

            explainer.zero_grad()
            explainer.optimizer.zero_grad()
            loss.backward(retain_graph=True)
            explainer.optimizer.step()

        node_importance = self.node_importance
        logits = init_logits.cpu().detach().numpy()

        return node_importance, logits