コード例 #1
0
    def __call__(self, data):
        # find idx of landmark
        _, node_idx = knn(data.pos, data[self.key].view(-1, 3), k=1)

        # extract patch around vertex
        mask = self._mask_geodesic_patch(node_idx, data)
        vert_indices = torch.nonzero(mask).view(-1)

        # update datastructure
        data.pos = data.pos[mask]
        data.edge_index, data.edge_attr = subgraph(
            vert_indices, data.edge_index, data.edge_attr, relabel_nodes=True)

        if hasattr(data, 'x'):
            data.x = data.x[mask]

        if hasattr(data, 'face'):
            # only keep faces of which all 3 edges are in the mask
            data.face = data.face[:, (
                data.face[..., None] == vert_indices.view(-1)).any(-1).all(0)]

            # remap faces to match new vertex indices
            index_mapping = torch.zeros(mask.shape, dtype=torch.long)
            index_mapping[mask] = torch.arange(data.pos.shape[0])
            data.face = index_mapping[data.face]

        return data
コード例 #2
0
    def subgraph(self, subset: Tensor):
        r"""Returns the induced subgraph given by the node indices
        :obj:`subset`.

        Args:
            subset (LongTensor or BoolTensor): The nodes to keep.
        """

        out = subgraph(subset,
                       self.edge_index,
                       relabel_nodes=True,
                       num_nodes=self.num_nodes,
                       return_edge_mask=True)
        edge_index, _, edge_mask = out

        if subset.dtype == torch.bool:
            num_nodes = int(subset.sum())
        else:
            num_nodes = subset.size(0)

        data = copy.copy(self)

        for key, value in data:
            if key == 'edge_index':
                data.edge_index = edge_index
            elif key == 'num_nodes':
                data.num_nodes = num_nodes
            elif isinstance(value, Tensor):
                if self.is_node_attr(key):
                    data[key] = value[subset]
                elif self.is_edge_attr(key):
                    data[key] = value[edge_mask]

        return data
コード例 #3
0
ファイル: data_utils.py プロジェクト: its-gucci/hgcn
def load_data(args, datapath):
    if args.dataset in ['arxiv'] and args.task == 'lp':
        data = {}
        dataset = PygNodePropPredDataset(name='ogbn-{}'.format(args.dataset),
                                         root='/pasteur/u/jeffgu/hgcn/data')
        split_idx = dataset.get_idx_split()
        train_idx, valid_idx, test_idx = split_idx["train"], split_idx[
            "valid"], split_idx["test"]
        induced_edges_train, _ = subgraph(train_idx, dataset[0].edge_index)
        induced_edges_valid, _ = subgraph(valid_idx, dataset[0].edge_index)
        induced_edges_test, _ = subgraph(test_idx, dataset[0].edge_index)
        neg_edges_train = negative_sampling(induced_edges_train)
        neg_edges_valid = negative_sampling(induced_edges_valid)
        neg_edges_test = negative_sampling(induced_edges_test)
        data['adj_train'] = to_scipy_sparse_matrix(
            dataset[0].edge_index).tocsr()
        data['features'] = dataset[0].x
        data['train_edges'], data[
            'train_edges_false'] = induced_edges_train, neg_edges_train
        data['val_edges'], data[
            'val_edges_false'] = induced_edges_valid, neg_edges_valid
        data['test_edges'], data[
            'test_edges_false'] = induced_edges_test, neg_edges_test
    elif args.task == 'nc':
        data = load_data_nc(args.dataset, args.use_feats, datapath,
                            args.split_seed)
    else:
        data = load_data_lp(args.dataset, args.use_feats, datapath)
        adj = data['adj_train']
        if args.task == 'lp':
            adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_edges(
                adj, args.val_prop, args.test_prop, args.split_seed)
            data['adj_train'] = adj_train
            data['train_edges'], data[
                'train_edges_false'] = train_edges, train_edges_false
            data['val_edges'], data[
                'val_edges_false'] = val_edges, val_edges_false
            data['test_edges'], data[
                'test_edges_false'] = test_edges, test_edges_false
    data['adj_train_norm'], data['features'] = process(data['adj_train'],
                                                       data['features'],
                                                       args.normalize_adj,
                                                       args.normalize_feats)
    if args.dataset == 'airport':
        data['features'] = augment(data['adj_train'], data['features'])
    return data
コード例 #4
0
def test_subgraph_convert():
    G = nx.complete_graph(5)

    edge_index = from_networkx(G).edge_index
    sub_edge_index_1, _ = subgraph([0, 1, 3, 4], edge_index)

    sub_edge_index_2 = from_networkx(G.subgraph([0, 1, 3, 4])).edge_index

    assert sub_edge_index_1.tolist() == sub_edge_index_2.tolist()
コード例 #5
0
def test_subgraph():
    edge_index = torch.tensor([
        [0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6],
        [1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5],
    ])
    edge_attr = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])

    idx = torch.tensor([3, 4, 5], dtype=torch.long)
    mask = torch.tensor([0, 0, 0, 1, 1, 1, 0], dtype=torch.bool)
    indices = [3, 4, 5]

    for subset in [idx, mask, indices]:
        out = subgraph(subset, edge_index, edge_attr)
        assert out[0].tolist() == [[3, 4, 4, 5], [4, 3, 5, 4]]
        assert out[1].tolist() == [7, 8, 9, 10]

        out = subgraph(subset, edge_index, edge_attr, relabel_nodes=True)
        assert out[0].tolist() == [[0, 1, 1, 2], [1, 0, 2, 1]]
        assert out[1].tolist() == [7, 8, 9, 10]
コード例 #6
0
def _subgraph(data):

    x = data.x.clone()
    edge_index = data.edge_index.clone()
    __edge_attr = data.edge_attribute.clone()
    traj_vocabs = data.traj_vocabs.clone()
    traj_index = torch.tensor([(x == n).nonzero().squeeze()[0].item()
                               for n in traj_vocabs])

    order2index = defaultdict(list)
    for i, idx in enumerate(__edge_attr, 1):
        order2index[i] = int(idx)

    edge_attr = torch.zeros(edge_index.size(1), dtype=torch.long)
    edge_attr[__edge_attr] = torch.arange(1, __edge_attr.size(0) + 1)

    mask = torch.zeros(x.shape[0], dtype=torch.bool)
    inds = torch.unique(traj_index)
    mask[inds] = True
    perm = torch.randperm(torch.sum(~mask.squeeze()))
    conn = torch.arange(
        mask.size(0))[~mask.squeeze()][perm[:max(3,
                                                 len(inds) // 3)]]
    #     conn = torch.tensor([], dtype=torch.long)

    nodes = torch.cat((inds, conn), dim=0)
    edge_ind, edge_att = subgraph(nodes,
                                  edge_index,
                                  edge_attr,
                                  num_nodes=len(x))

    # edge_attr matching between origin and subgraph
    edge_attr = torch.argsort(edge_attr,
                              descending=False)[-edge_attr.nonzero().size(0):]
    edge_att = torch.argsort(edge_att,
                             descending=False)[-edge_att.nonzero().size(0):]
    origin_sub = {int(p): int(c) for p, c in zip(edge_attr, edge_att)}

    edge_att = torch.tensor(
        [origin_sub[index] for order, index in order2index.items()],
        dtype=torch.long)

    tm_index = torch.cat(
        (torch.tensor(traj_index, dtype=torch.long), conn.to(torch.long)),
        dim=0)

    data.edge_index = edge_ind.to(torch.long)
    data.edge_attribute = edge_att
    data.edge_attribute_len = torch.tensor(len(edge_att),
                                           dtype=torch.long).unsqueeze(-1)
    data.tm_index = tm_index
    data.tm_len = torch.tensor(len(tm_index), dtype=torch.long).unsqueeze(-1)

    return data, order2index
コード例 #7
0
ファイル: util.py プロジェクト: yzfxmu/GIB
def to_inductive(data):
    mask = data.train_mask | data.val_mask
    data.x = data.x[mask]
    data.y = data.y[mask]
    data.train_mask = data.train_mask[mask]
    data.val_mask = data.val_mask[mask]
    data.test_mask = None
    data.edge_index, _ = subgraph(mask, data.edge_index, None,
                                  relabel_nodes=True, num_nodes=data.num_nodes)
    data.num_nodes = mask.sum().item()
    return data
コード例 #8
0
def test_from_networkx_subgraph_convert():
    import networkx as nx

    G = nx.complete_graph(5)

    edge_index = from_networkx(G).edge_index
    sub_edge_index_1, _ = subgraph([0, 1, 3, 4],
                                   edge_index,
                                   relabel_nodes=True)

    sub_edge_index_2 = from_networkx(G.subgraph([0, 1, 3, 4])).edge_index

    assert sub_edge_index_1.tolist() == sub_edge_index_2.tolist()
コード例 #9
0
    def process_cluster_data(self, data):
        """
        Data processing for ClusterSelfGNN. First the data object will be clustered according to the number of partition
        specified by this class. Then, we randomly sample a number of clusters and merge them together. Finally, data 
        augmentation is applied each of the final clusters. This is a simple strategy motivated by ClusterGCN and 
        employed to improve the scalability of SelfGNN.

        :param data: A PyTorch Geometric Data object
        :return: a list of Data objects depending on the final number of clusters.
        """
        data_list = []
        clusters = []
        num_parts, cluster_size = self.num_parts, self.num_parts // self.final_parts

        # Cluster the data
        cd = ClusterData(data, num_parts=num_parts)
        for i in range(1, cd.partptr.shape[0]):
            cls_nodes = cd.perm[cd.partptr[i - 1]: cd.partptr[i]]
            clusters.append(cls_nodes)

        # Randomly merge clusters and apply transformation
        np.random.shuffle(clusters)
        for i in tqdm(range(0, len(clusters), cluster_size), "Processing clusters"):
            end = i + cluster_size if len(clusters) - i > cluster_size else len(clusters)
            cls_nodes = torch.cat(clusters[i:end]).unique()

            x = data.x[cls_nodes]
            y = data.y[cls_nodes]
            train_mask = data.train_mask[cls_nodes]
            dev_mask = data.val_mask[cls_nodes]
            test_mask = data.test_mask[cls_nodes]
            edge_index, edge_attr = subgraph(cls_nodes, data.edge_index, relabel_nodes=True)
            view1data = Data(edge_index=edge_index, x=x, edge_attr=edge_attr, num_nodes=cls_nodes.shape[0])
            view2data = view1data if self.augumentation is None else self.augumentation(view1data)
            if not hasattr(view2data, "edge_attr") or view2data.edge_attr is None:
                view2data.edge_attr = torch.ones(view2data.edge_index.shape[1])
            diff = abs(view2data.x.shape[1] - view1data.x.shape[1])
            if diff > 0:
                smaller_data = view1data if view1data.x.shape[1] < view2data.x.shape[1] else view2data
                smaller_data.x = F.pad(smaller_data.x, pad=(0, diff))
                view1data.x = F.normalize(view1data.x)
                view2data.x = F.normalize(view2data.x)
            new_data = Data(y=y, x=view1data.x, x2=view2data.x, edge_index=view1data.edge_index,
                            edge_index2=view2data.edge_index,
                            edge_attr=view1data.edge_attr, edge_attr2=view2data.edge_attr, train_mask=train_mask,
                            dev_mask=dev_mask, test_mask=test_mask, num_nodes=cls_nodes.shape[0], nodes=cls_nodes)
            data_list.append(new_data)
        print()
        return data_list
コード例 #10
0
 def negative_sampling(self, batch, num_negative_samples):
    # mask = torch.tensor([False]*len(self.data.x))
     #mask[batch] = True
     #_,a = self.edge_index_to_train(mask)
     a,_ = subgraph(batch,self.data.edge_index)
     Adj = self.adj_list(a) 
     g = dict()
     batch = batch.tolist()
     for node in batch:
         g[node] = batch
     for node, neghbors in Adj.items():
         g[node] = list(set(batch) - set(neghbors)) #тут все элементы которые не являются соседянями, но при этом входят в батч 
     for node, neg_elem in g.items(): 
         g[node] = self.not_less_than(num_negative_samples, g[node]) #если просят конкретное число негативных примеров, надо либо обрезать либо дублировать 
     return self.torch_list(g)
コード例 #11
0
ファイル: gatmodel.py プロジェクト: cscvlab/SN-Graph
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x_posr = x
        batch_size = (int)(batch.size()[0] / self.node_per_graph)

        ##if(self.training == True and random.random()>0.8):
        #    rotateAngleA = random.random() * pi
        #    rotateAngleB = random.random() * pi
        #    rotateAngleC = random.random() * pi
        #    sinA, cosA = math.sin(rotateAngleA), math.cos(rotateAngleA)
        #    sinB, cosB = math.sin(rotateAngleB), math.cos(rotateAngleB)
        #    sinC, cosC = math.sin(rotateAngleC), math.cos(rotateAngleC)
        #    matrix = [[cosC*cosB, -sinC*cosA+cosC*sinB*sinA, sinC*sinA+cosC*sinB*cosA],
        #              [sinC*cosB, cosC*cosA+sinC*sinB*sinA, -cosC*sinA+sinC*sinB*cosA],
        #              [-sinB, cosB*sinA, cosB*cosA]]
        #    x_xyz = x[:,0:3]
        #    x_xyz = torch.matmul(x_xyz, torch.tensor(matrix).to(x_xyz.dtype).to(x_xyz.device))
        #    #x_xyz = LinearTransformation(torch.tensor(matrix))(x_xyz)
        #    x_r = x[:,3]
        #    x_r = x_r.reshape((x_r.shape[0], 1))
        #    x = torch.cat((x_xyz, x_r), dim=1)

        add_self_loops(edge_index)

        if (self.training == True):
            mask, torchmask = random_drop_node(self.node_per_graph, (int)(
                batch.size()[0] / self.node_per_graph), 0.50, 0.50)
            x = x[mask]
            x_posr = x_posr[mask]
            batch = batch[mask]
            edge_index, _ = subgraph(torchmask, edge_index, relabel_nodes=True)
        x0 = self.linprev(x, edge_index)
        x1 = self.conv1(x0, edge_index) + self.lin1(x0)
        x1n = F.relu(x1)
        x2 = self.conv2(x1n, edge_index) + self.lin2(x1n)
        x2n = F.relu(x2)
        x3 = self.conv3(x2n, edge_index) + self.lin3(x2n)
        x3n = F.relu(x3)
        x4 = self.conv4(x3n, edge_index) + self.lin4(x3n)
        x = torch.cat((x1, x2, x3, x4), dim=1)
        x = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
        x = self.mlp(x)
        x = F.log_softmax(x, dim=-1)
        return x
コード例 #12
0
ファイル: transforms.py プロジェクト: sisaman/LPGNN
    def __call__(self, data):
        y = torch.nn.functional.one_hot(data.y)
        c = y.sum(dim=0).sort(descending=True)
        y = y[:, c.indices[:self.num_classes]]
        idx = y.sum(dim=1).bool()

        data.x = data.x[idx]
        data.y = y[idx].argmax(dim=1)
        data.num_nodes = data.y.size(0)

        if 'adj_t' in data:
            data.adj_t = data.adj_t[idx, idx]
        elif 'edge_index' in data:
            data.edge_index, data.edge_attr = subgraph(idx, data.edge_index, data.edge_attr, relabel_nodes=True)

        if 'train_mask' in data:
            data.train_mask = data.train_mask[idx]
            data.val_mask = data.val_mask[idx]
            data.test_mask = data.test_mask[idx]

        return data
コード例 #13
0
    def predict(self, data):
        """End to end prediction for INVASE with input batched graph
        """
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # pass through selector
        node_prob, fea_prob = self(x, edge_index, batch, component="actor")
        # Sampling the features based on the selection_probability
        node_selection_mask = torch.bernoulli(node_prob)
        node_selection = torch.squeeze(
            torch.nonzero(node_selection_mask, as_tuple=False))
        fea_selection_mask = torch.bernoulli(fea_prob)

        # make subgraph
        # mask out features
        subgraph_x = x * fea_selection_mask[batch]  # keep all the nodes
        subgraph_edge_index, _ = subgraph(
            node_selection,
            edge_index)  # returning only the edges of the subgraph
        # Prediction
        y_hat = self.critic([subgraph_x, node_selection], subgraph_edge_index,
                            batch)

        return y_hat
コード例 #14
0
    def pos_sample(self,batch,**kwargs):
        d_pb =datetime.now()
        batch = batch
        pos_batch=[]
        d = datetime.now()
        if self.loss["C"] == "Adj" and self.loss["Name"] == "LINE":
                name = 'pos_samples_LINE_'+self.datasetname+'.pickle'
                if os.path.exists(name):
                        with open(name,'rb') as f:
                            pos_batch = pickle.load(f)
                else:
                    A = self.edge_index_to_adj_train(batch)
                    pos_batch = self.convert_to_samples(batch, A)
                    with open(name,'wb') as f:
                        pickle.dump(pos_batch,f)
        elif self.loss["C"] == "Adj" and self.loss["Name"] == "VERSE_Adj":
                name = 'pos_samples_VERSEAdj_'+self.datasetname+'.pickle'
                if os.path.exists(name):
                    with open(name,'rb') as f:
                        pos_batch = pickle.load(f)
                else:
                    Adj = self.edge_index_to_adj_train(batch).type(torch.FloatTensor)

                    A = (Adj / sum(Adj)).t()
                    A[torch.isinf(A)] = 0
                    A[torch.isnan(A)] = 0
                    pos_batch = self.convert_to_samples(batch, A)
                    with open(name,'wb') as f:
                        pickle.dump(pos_batch,f)
                        
                
        elif self.loss["C"] == "SR":
                SimRankName = 'SimRank'+self.datasetname+'.pickle'
                if os.path.exists(SimRankName):
                    with open(SimRankName,'rb') as f:
                        A = pickle.load(f)
                else:
                    Adj,_ = subgraph(batch,self.data.edge_index) 
                    row,col= Adj
                    row = row.to(self.device)
                    col = col.to(self.device)
                    ASparse = SparseTensor(row=row, col=col, sparse_sizes=(len(batch), len(batch)))
                    r = 200 
                    length = list(map(lambda x: x*int(r/100), [22,17,14,10,8,6,5,4,3,11]))
                    mask = []
                    for i, l in enumerate(length):
                        mask1 = torch.zeros([l,10])
                        mask1.t()[:(i+1)] = 1
                        mask.append(mask1)
                    mask = torch.cat(mask)
                    mask_new = 1 - mask
                    A = self.find_sim_rank_for_batch_torch(batch,ASparse,self.device,mask,mask_new,r)
                    with open(SimRankName,'wb') as f:
                        pickle.dump(A,f)
                samples_name = 'samples_simrank_' + self.datasetname +'.pickle'
                if os.path.exists(samples_name):
    
                    with open(samples_name,'rb') as f:
                        pos_batch = pickle.load(f)

                else:
                    pos_batch = self.convert_to_samples(batch, A)
                    with open(samples_name,'wb') as f:
                        pickle.dump(pos_batch,f)
                    
                        
        elif self.loss["C"] == "PPR":
            alpha = self.alpha
            name_of_file = 'pos_samples_VERSEPPR_'+str(alpha)+'_' +self.datasetname+'.pickle'
            if os.path.exists(name_of_file):
                        with open(name_of_file,'rb') as f:
                            pos_batch = pickle.load(f)
            else:   
                    Adg = self.edge_index_to_adj_train(batch).type(torch.FloatTensor)
                    print('1')
                    invD =torch.diag(1/sum(Adg.t()))
                    invD[torch.isinf(invD)] = 0
                    print('2')
                    A = ((1-alpha)*torch.inverse(torch.diag(torch.ones(len(Adg))) - alpha*torch.matmul(invD,Adg)))
                    print('3')
                    pos_batch = self.convert_to_samples(batch, A)
                    print('4')
                    with open(name_of_file,'wb') as f:
                        pickle.dump(pos_batch,f)
      
        return pos_batch
コード例 #15
0
ファイル: model_util.py プロジェクト: leekh7411/GraphProt2
def get_subgraph_scores(loader, device, model, list_win_sizes, softmax=False):
    """
    Get position-wise scores by scoring subgraphs, and normalize the
    class 1 softmax probabilities from -1 .. 1. Only works with
    batch size == 1.

    """
    # List of lists to store probability list for each site.
    pr_ll = []

    model.eval()
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            l_x = len(data.x)
            # Make graph index list.
            idx_list = []
            for i in range(l_x):
                idx_list.append(i)
            sm = torch.nn.Softmax()
            pr_win_list = []
            for win_size in list_win_sizes:
                win_extlr = int(win_size / 2)
                pr_list = []
                for i in range(l_x):
                    s = i - win_extlr
                    e = i + win_extlr + 1
                    if s < 0:
                        s = 0
                    if e > l_x:
                        e = l_x
                    subset = idx_list[s:e]
                    sub_edge_index = subgraph(subset, data.edge_index)
                    output = model(data.x, sub_edge_index[0], data.batch)
                    if softmax:
                        probs = sm(output)
                        #class_0_prob = float(probs[0][0].cpu().detach().numpy())
                        class_1_prob = float(
                            probs[0][1].cpu().detach().numpy())
                        pr_list.append(class_1_prob)
                    else:
                        output = torch.exp(output)
                        output = output.cpu().detach().numpy()[:, 1]
                        class_1_prob = float(output[0])
                        pr_list.append(class_1_prob)

                for i, pr in enumerate(pr_list):
                    pr_list[i] = min_max_normalize_probs(pr,
                                                         1,
                                                         0,
                                                         borders=[-1, 1])

                # Deal with scores at ends.
                start_idx = idx_list[:win_extlr]
                end_idx = idx_list[-win_extlr:]
                for i in start_idx:
                    pr_list[i] = pr_list[win_extlr]
                for i in end_idx:
                    pr_list[i] = pr_list[l_x - win_extlr - 1]
                pr_win_list.append(pr_list)

            # Calculate mean list scores.
            mean_pr_list = list(np.mean(pr_win_list, axis=0))
            # Add mean scores list to existing list of lists.
            pr_ll.append(mean_pr_list)

    assert pr_ll, "pr_ll empty"
    return pr_ll
コード例 #16
0
ファイル: utils_data.py プロジェクト: jovitalukasik/SVGe
def prep_data(data, max_num_nodes, aggr='sum', device='cpu', NB201=False, NB101=False):
    device = torch.device(device)
    data_list=[]
    for graph in tqdm(data):
        node_atts=graph.node_atts.numpy()
        node_atts_reverse=np.flip(graph.node_atts.numpy(),0)
        num_nodes=node_atts.size
        L_list=list(range(num_nodes-1,-1,-1))
        L= { i : L_list[i] for i in range(0,len(L_list)) }
        edge_list= sort_edge_index(graph.edge_index,num_nodes)
        edge_index_reverse= torch.flip(edge_list,[0,1])
        edge_list_reverse=torch.LongTensor(np.stack(([L[x] for x in edge_index_reverse[0].numpy()],[L[x] for x in edge_index_reverse[1].numpy()])))
        edge_list_reverse= sort_edge_index(edge_list_reverse,num_nodes)
        nodes=np.zeros(max_num_nodes-1, dtype=int)
        nodes[:num_nodes-1]=1    
        acc=graph.acc.numpy().item()
        if NB201: 
            test_acc=graph.test_acc.numpy().item()
            acc_avg=graph.acc_avg.numpy().item()
            test_acc_avg=graph.test_acc_avg.numpy().item()
            training_time=graph.training_time.numpy().item()
            data=Data(edge_index=edge_list.to(device),
                        num_nodes=num_nodes, 
                        node_atts=torch.LongTensor(node_atts).to(device),
                        acc=torch.tensor([acc]).to(device),
                        test_acc=torch.tensor([test_acc]).to(device),
                        acc_avg=torch.tensor([acc_avg]).to(device),
                        test_acc_avg=torch.tensor([test_acc_avg]).to(device),
                        training_time=torch.tensor([training_time]).to(device),
                        nodes=torch.tensor(nodes).unsqueeze(0).to(device)
                    )
        elif NB101:
            # try: 
            training_time=graph.training_time.numpy().item()
            test_acc=graph.test_acc.numpy().item()
            data=Data(edge_index=edge_list.to(device),
                        num_nodes=num_nodes, 
                        node_atts=torch.LongTensor(node_atts).to(device),
                        acc=torch.tensor([acc]).to(device),
                        test_acc=torch.tensor([test_acc]).to(device),
                        nodes=torch.tensor(nodes).unsqueeze(0).to(device),
                        training_time=torch.tensor([training_time]).to(device),
                    )
            # except:
        else:
            data=Data(edge_index=edge_list.to(device),
                        num_nodes=num_nodes, 
                        node_atts=torch.LongTensor(node_atts).to(device),
                        acc=torch.tensor([acc]).to(device),
                        nodes=torch.tensor(nodes).unsqueeze(0).to(device)
                    )
        data_full=[data]
        for idx in range(max_num_nodes-1):
            num_nodes=idx+2
            if num_nodes>node_atts.size:
                data=Data(edge_index=subgraph(list(range(2)), edge_list)[0].to(device),
                        num_nodes=num_nodes,
                        node_atts=torch.LongTensor([node_atts[0]]).to(device), 
                        edges=torch.zeros(idx+1).unsqueeze(0).to(device)
                )   
            else:
                data=Data(edge_index=subgraph(list(range(num_nodes)), edge_list)[0].to(device),
                        num_nodes=num_nodes,
                        node_atts=torch.LongTensor([node_atts[idx+1]]).to(device), 
                        edges=to_dense_adj(edge_list)[0][:,idx+1][:idx+1].unsqueeze(0).to(device)
                )
            data_full.append(data)
        for idx in range(max_num_nodes-1):
            num_nodes=idx+2
            if num_nodes>node_atts_reverse.size:
                data=Data(edge_index=subgraph(list(range(2)), edge_list_reverse)[0].to(device),
                        num_nodes=num_nodes,
                        node_atts=torch.LongTensor([node_atts_reverse[0]]).to(device), 
                        edges=torch.zeros(idx+1).unsqueeze(0).to(device)
                ) 
            else:
                data=Data(edge_index=subgraph(list(range(num_nodes)), edge_list_reverse)[0].to(device),
                        num_nodes=num_nodes,
                        node_atts=torch.LongTensor([node_atts_reverse[idx+1]]).to(device), 
                        edges=to_dense_adj(edge_list_reverse)[0][:,idx+1][:idx+1].unsqueeze(0).to(device)
                )
            data_full.append(data)

        data_list.append(tuple(data_full))
    return data_list
コード例 #17
0
source_nodes = torch.cat([
    torch.where(rel_data.node_year_dict['paper'] == year)[0]
    for year in source_years
])

target_nodes = torch.cat([
    torch.where(rel_data.node_year_dict['paper'] == year)[0]
    for year in target_years
])

source_nodes, _ = source_nodes.sort()
target_nodes, _ = target_nodes.sort()

source_edge_index, _ = subgraph(source_nodes,
                                data.edge_index,
                                relabel_nodes=True)
target_edge_index, _ = subgraph(target_nodes,
                                data.edge_index,
                                relabel_nodes=True)

source_data = Data(x=rel_data.x_dict['paper'][source_nodes],
                   edge_index=source_edge_index,
                   y=rel_data.y_dict['paper'][source_nodes])

target_data = Data(x=rel_data.x_dict['paper'][target_nodes],
                   edge_index=target_edge_index,
                   y=rel_data.y_dict['paper'][target_nodes])

data = target_data.to(device)  # Train on Target split
コード例 #18
0
ファイル: data.py プロジェクト: skadoodleR/BGRL_Pytorch
    def process_cluster_data(self, data):
        """
        Augmented view data generation based on clustering.

        :param data:
        :return:
        """
        data_list = []
        clusters = []
        num_parts, cluster_size = self.num_parts, self.num_parts // self.final_parts

        # Cluster the data
        cd = ClusterData(data, num_parts=num_parts)
        for i in range(1, cd.partptr.shape[0]):
            cls_nodes = cd.perm[cd.partptr[i - 1]:cd.partptr[i]]
            clusters.append(cls_nodes)

        # Randomly merge clusters and apply transformation
        np.random.shuffle(clusters)
        for i in range(0, len(clusters), cluster_size):
            end = i + cluster_size if len(
                clusters) - i > cluster_size else len(clusters)
            cls_nodes = torch.cat(clusters[i:end]).unique()
            sys.stdout.write(
                f'\rProcessing cluster {i + 1}/{len(clusters)} with {self.final_parts} nodes'
            )
            sys.stdout.flush()

            x = data.x[cls_nodes]
            y = data.y[cls_nodes]
            train_mask = data.train_mask[cls_nodes]
            dev_mask = data.val_mask[cls_nodes]
            test_mask = data.test_mask[cls_nodes]
            edge_index, edge_attr = subgraph(cls_nodes,
                                             data.edge_index,
                                             relabel_nodes=True)
            data = Data(edge_index=edge_index,
                        x=x,
                        edge_attr=edge_attr,
                        num_nodes=cls_nodes.shape[0])
            view1data, view2data = self.augumentation(data)
            if not hasattr(view1data,
                           "edge_attr") or view1data.edge_attr is None:
                view1data.edge_attr = torch.ones(view1data.edge_index.shape[1])
            if not hasattr(view2data,
                           "edge_attr") or view2data.edge_attr is None:
                view2data.edge_attr = torch.ones(view2data.edge_index.shape[1])
            diff = abs(view2data.x.shape[1] - view1data.x.shape[1])
            if diff > 0:
                smaller_data = view1data if view1data.x.shape[
                    1] < view2data.x.shape[1] else view2data
                smaller_data.x = F.pad(smaller_data.x, pad=(0, diff))
                view1data.x = F.normalize(view1data.x)
                view2data.x = F.normalize(view2data.x)
            print(view1data)
            print(view2data)
            new_data = Data(y=y,
                            x1=view1data.x,
                            x2=view2data.x,
                            edge_index1=view1data.edge_index,
                            edge_index2=view2data.edge_index,
                            edge_attr1=view1data.edge_attr,
                            edge_attr2=view2data.edge_attr,
                            train_mask=train_mask,
                            dev_mask=dev_mask,
                            test_mask=test_mask,
                            num_nodes=cls_nodes.shape[0],
                            nodes=cls_nodes)
            data_list.append(new_data)
        print()
        return data_list
コード例 #19
0
    def evaluate(self, generator, criterion, optimizer, device, task="train"):
        """evaluate the model
        Params:
        - generator: graph dataloader
        - criterion: baseline loss function 
        - optimise: optimiser linked to model parameters
        - device: cuda or cpu
        - task: train, val or test
        """
        actor_loss_meter = AverageMeter()
        baseline_acc_meter = AverageMeter()
        critic_acc_meter = AverageMeter()
        prop_of_nodes = AverageMeter()
        prop_of_feas = AverageMeter()

        if task == "test":
            self.eval()
            x_test = []
            selected_features = []
            selected_nodes = []
            y_trues = []
            y_preds = []
        else:
            if task == "val":
                self.eval()
            elif task == "train":
                self.train()
            else:
                raise NameError("Only train, val or test is allowed as task")

        with trange(len(generator)) as t:
            for data in generator:
                # these are batched graphs
                orig = data.clone()
                x, edge_index, batch, y_true = data.x, data.edge_index, data.batch, data.y
                x, edge_index, batch, y_true = x.to(device), edge_index.to(
                    device), batch.to(device), y_true.to(device)
                # prediction on full graph
                baseline_logits = self(x,
                                       edge_index,
                                       batch,
                                       component="baseline")
                # print(baseline_logits)
                baseline_loss = criterion(baseline_logits, y_true)

                # pass through selector
                node_prob, fea_prob = self(x,
                                           edge_index,
                                           batch,
                                           component="actor")
                # Sampling the features based on the selection_probability
                node_selection_mask = torch.bernoulli(node_prob)
                node_selection = torch.squeeze(
                    torch.nonzero(node_selection_mask, as_tuple=False))
                fea_selection_mask = torch.bernoulli(fea_prob)

                # make subgraph
                # mask out features
                subgraph_x = x * fea_selection_mask[
                    batch]  # keep all the nodes
                subgraph_edge_index, _ = subgraph(
                    node_selection,
                    edge_index)  # returning only the edges of the subgraph

                critic_logits = self([subgraph_x, node_selection],
                                     subgraph_edge_index,
                                     batch,
                                     component="critic")
                critic_loss = criterion(critic_logits, y_true)

                actor_loss = self.actor_loss(
                    node_selection_mask.clone().detach(),
                    fea_selection_mask.clone().detach(),
                    batch.clone().detach(),
                    self.softmax(critic_logits).clone().detach(),
                    self.softmax(baseline_logits).clone().detach(),
                    y_true.float(), node_prob, fea_prob)

                actor_loss_meter.update(actor_loss.data.cpu().item(),
                                        y_true.size(0))
                critic_preds = torch.argmax(critic_logits, dim=1)
                critic_acc = torch.sum(
                    critic_preds == y_true).float() / y_true.size(0)
                critic_acc_meter.update(critic_acc)
                baseline_preds = torch.argmax(baseline_logits, dim=1)
                baseline_acc = torch.sum(
                    baseline_preds == y_true).float() / y_true.size(0)
                baseline_acc_meter.update(baseline_acc)

                prop_of_feas.update(
                    torch.mean(torch.mean(fea_selection_mask, dim=-1)),
                    y_true.size(0))
                prop_of_nodes.update(torch.mean(node_selection_mask),
                                     y_true.size(0))

                if task == "test":
                    # collect and analyse results
                    x_test += orig.to_data_list()
                    selected_features.append(fea_prob.detach().cpu().numpy())
                    node_prob = node_prob.detach().cpu().numpy()
                    # get graphwise node selection
                    selected_nodes += [[
                        x for j, x in enumerate(node_prob) if batch[j] == i
                    ] for i in range(len(y_true))]
                    y_trues.append(y_true.detach().cpu().numpy())
                    y_preds.append(critic_preds.detach().cpu().numpy())

                elif task == "train":
                    # compute gradient and do SGD step
                    optimizer.zero_grad()
                    total_loss = actor_loss + critic_loss + baseline_loss
                    total_loss.backward()
                    optimizer.step()
                t.update()

        # TODO explanation accuracy

        if task == "test":

            return critic_acc_meter.avg, baseline_acc_meter.avg, x_test, \
                    np.concatenate(selected_features, axis=0), selected_nodes, np.concatenate(y_trues), np.concatenate(y_preds)
        else:
            return actor_loss_meter.avg, critic_acc_meter.avg, baseline_acc_meter.avg, prop_of_feas.avg, prop_of_nodes.avg
コード例 #20
0
ファイル: load_data.py プロジェクト: JunweiSUN/AutoGRL
def load_data(name, seed, transform=None):
    '''
    Load data from files and return a pytorch geometric `Data` object
    '''
    random.seed(seed) # make sure that the split of data is the same
    ROOT = osp.dirname(osp.abspath(__file__)) + '/..'

    if name in ['cora', 'citeseer', 'pubmed']:   # datasets for transductive node classifiction
        data = Planetoid(osp.join(ROOT, 'data'), name, transform=transform)[0]
        data.task = 'semi' # semi-supervised
        data.setting = 'transductive' # transductive
        return data
    
    elif name in ['wikics']:
        dataset = WikiCS(osp.join(ROOT, 'data', 'wikics'), transform=transform)
        data = dataset[0]
        data.task = 'semi'
        data.setting = 'transductive'
        data.train_mask = data.train_mask[:,0]
        data.val_mask = data.val_mask[:, 0]
        data.stopping_mask = data.stopping_mask[:, 0]
        return data

    elif name in ['ppi']: # datasets for inductive node classification
        train_dataset = PPI(osp.join(ROOT, 'data', 'ppi'), split='train', transform=transform)
        val_dataset = PPI(osp.join(ROOT, 'data', 'ppi'), split='val', transform=transform)
        test_dataset = PPI(osp.join(ROOT, 'data', 'ppi'), split='test', transform=transform)
        return (train_dataset, val_dataset, test_dataset)
    elif name in ['usa-airports']:
        try:
            data = pickle.load(open(osp.join(ROOT, 'data', name, 'data.pkl'), 'rb'))
            return data
        except FileNotFoundError:
            print('Data not found. Re-generating...')
        nx_graph = nx.read_edgelist(osp.join(ROOT, 'data', name, 'edges.txt'))
        nx_graph = nx.convert_node_labels_to_integers(nx_graph, label_attribute='id2oid') # oid for original id
        oid2id = {int(v):k for k,v in nx.get_node_attributes(nx_graph, 'id2oid').items()}
        id2label = {}
        for line in open(osp.join(ROOT, 'data', name, 'labels.txt')):
            linesplit = line.strip().split()
            oid = int(linesplit[0])
            label = int(linesplit[1])
            id2label[oid2id[oid]] = {'y': label} # here we assume that the label id start from 0 and the labeling is consistant.
        nx.set_node_attributes(nx_graph, id2label)

        data = from_networkx(nx_graph)
        num_nodes = len(nx_graph.nodes)
        node_idxs = list(range(num_nodes))
        random.shuffle(node_idxs)
        # split data, train:val:test = 80%:10%:10%
        train_idxs = node_idxs[:int(0.8 * num_nodes)]
        val_idxs = node_idxs[int(0.8 * num_nodes):int(0.9 * num_nodes)]
        test_idxs = node_idxs[int(0.9 * num_nodes):]

        data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
        data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
        data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)
        data.train_mask[train_idxs] = True
        data.val_mask[val_idxs] = True
        data.test_mask[test_idxs] = True
        if data.x and transform:
            data.x = transform(data.x)
        data.num_nodes = num_nodes
        data.task = 'sup' # simi-supervised
        data.setting = 'transductive' # transductive
        pickle.dump(data, open(osp.join(ROOT, 'data', name, 'data.pkl'), 'wb'))
        return data

    elif name in ['ogbn-arxiv']:
        dataset = PygNodePropPredDataset(name, root=osp.join(ROOT, 'data'), transform=transform)
        split_idx = dataset.get_idx_split()
        data = dataset[0]
        split_idx['val'] = split_idx.pop('valid')
        for key, idx in split_idx.items():
            mask = torch.zeros(data.num_nodes, dtype=torch.bool)
            mask[idx] = True
            data[f'{key}_mask'] = mask
        data.task = 'sup' # simi-supervised
        data.setting = 'transductive' # transductive
        return data

    elif name in ['photo']:
        dataset = Amazon('data/photo', 'photo', transform=transform)
        data = dataset[0]
        data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        data.train_mask[:-1000] = True
        data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        data.val_mask[-1000: -500] = True
        data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        data.test_mask[-500:] = True

        data.train_edge_index, _ = subgraph(data.train_mask, data.edge_index, relabel_nodes=True)
        data.val_edge_index, _ = subgraph(data.val_mask, data.edge_index, relabel_nodes=True)
        data.test_edge_index, _ = subgraph(data.test_mask, data.edge_index, relabel_nodes=True)
        data.train_x = data.x[data.train_mask]
        data.train_y = data.y[data.train_mask]
        data.val_x = data.x[data.val_mask]
        data.val_y = data.y[data.val_mask]
        data.test_x = data.x[data.test_mask]
        data.test_y = data.y[data.test_mask]

        data.num_train_nodes = data.train_x.shape[0]
        data.task = 'sup' # simi-supervised
        data.setting = 'inductive' # transductive
        return data

    else:
        raise NotImplementedError('Not supported dataset.')
コード例 #21
0
 def neg_sample(self,batch):
     len_batch = len(batch)
     a,_=subgraph(batch.tolist(),self.data.edge_index)
     neg_batch=self.NS.negative_sampling(batch,num_negative_samples=self.num_negative_samples)
     return neg_batch#%len_batch
コード例 #22
0
ファイル: ego_net_main.py プロジェクト: DylanSand/ego-gnn
 else:
     graph = real_data[0]
 #graph = five_data
 graph.edge_index = to_undirected(graph.edge_index, graph.num_nodes)
 graph.edge_index = add_self_loops(graph.edge_index,
                                   num_nodes=graph.num_nodes)[0]
 graph.coalesce()
 temp = NeighborSampler(edge_index=graph.edge_index, sizes=[-1])
 batches = temp
 egoNets = [0] * graph.num_nodes
 adjMats = [0] * graph.num_nodes
 plot = 331
 curPlot = 0
 norm_degrees = []
 for batch_size, n_id, adj in batches:
     curData = subgraph(n_id, graph.edge_index)
     updated_e_index = to_undirected(curData[0], n_id.shape[0])
     subgraph_size = torch.numel(n_id)
     cur_n_id = torch.sort(n_id)[0].tolist()
     cur_e_id = adj.e_id.tolist()
     subgraph2 = Data(edge_index=updated_e_index,
                      edge_attr=curData[1],
                      num_nodes=subgraph_size,
                      n_id=cur_n_id,
                      e_id=cur_e_id,
                      degree=len(cur_n_id) - 1,
                      adj=get_adj(updated_e_index, graph.edge_index,
                                  curPlot, cur_e_id))
     subgraph2.coalesce()
     ######################
     ego_degrees = {}
コード例 #23
0
    def inference(self, c):
        batch_size = c.size(0)
        h = self.generator.nodeInit(
            'start',
            torch.ones(batch_size, dtype=torch.long).to(c.device),
            c).unsqueeze(1)
        h, node_atts, edges, non_zeros = self.generator.inference(h, c, None)
        graph = node_atts.clone()
        node_atts = torch.cat([edges, node_atts], 1)
        num_zeros = (non_zeros == 0).sum().item()
        while num_zeros < batch_size:
            edge_index = edges2index(edges)
            h, node_atts_new, edges_new, non_zero = self.generator.inference(
                h, c, edge_index)
            graph = torch.cat([graph, node_atts_new, edges_new], 1)
            node_atts = torch.cat([node_atts, node_atts_new], 1)
            edges = torch.cat([edges, edges_new], 1)
            non_zeros = torch.mul(non_zeros, non_zero)
            num_zeros = (non_zeros == 0).sum().item()

        h_rev = self.generator.nodeInit(
            'end',
            torch.zeros(batch_size, dtype=torch.long).to(c.device),
            c).unsqueeze(1)
        h_rev, node_atts_rev, edges_rev, non_ones = self.generator.inference(
            h_rev, c, None, backwards=True)
        graph_rev = node_atts_rev.clone()
        node_atts_rev = torch.cat([edges_rev - edges_rev, node_atts_rev], 1)
        num_ones = (non_ones == 0).sum().item()
        while num_ones < batch_size:
            edge_index_rev = edges2index(edges_rev)
            h_rev, node_atts_new_rev, edges_new_rev, non_ones = self.generator.inference(
                h_rev, c, edge_index_rev, backwards=True)
            graph_rev = torch.cat(
                [graph_rev, node_atts_new_rev, edges_new_rev], 1)
            node_atts_rev = torch.cat([node_atts_rev, node_atts_new_rev], 1)
            edges_rev = torch.cat([edges_rev, edges_new_rev], 1)
            non_ones = torch.mul(non_ones, non_ones)
            num_ones = (non_ones == 0).sum().item()

        gf = batch2graph(graph)
        gb = batch2graph(graph_rev, backward=True)

        graph_out = list()
        for i in range(batch_size):
            ef = gf[i][1]
            eb_rev = gb[i][1]
            num_nodes = ef[1][-1].item() + 1
            L_list = list(range(num_nodes - 1, -1, -1))
            L = {i: L_list[i] for i in range(0, len(L_list))}

            if eb_rev[1][-1].item() > ef[1][-1].item():
                subset = list(range(num_nodes))
                eb_rev = subgraph(subset, eb_rev)[0]
            eb = torch.flip(
                torch.stack(
                    (torch.LongTensor([L[x.item()] for x in eb_rev[0]]),
                     torch.LongTensor([L[x.item()] for x in eb_rev[1]]))),
                [0, 1])
            for j in torch.transpose(eb, 1, 0):
                if j in torch.transpose(ef, 1, 0):
                    continue
                else:
                    ef = torch.cat([ef, j.unsqueeze(1)], 1)
            graph_out.append((gf[i][0].to(c.device), ef.to(c.device)))

        return graph_out, node_atts.view(batch_size,
                                         -1), edges2index(edges, finish=True)
コード例 #24
0
def denoise_graph(data, weighted_edge_mask, node_explanations, neighbours, node_idx, feat=None, label=None, threshold_num=10):
    """Cleaning a graph by thresholding its node values.

    Args:
            - weighted_edge_mask:  Edge mask, with importance given to each edge
            - node_explanations :  Shapley values for neighbours
            - neighbours
            - node_idx          :  Index of node to highlight (TODO ?)
            - feat              :  An array of node features.
            - label             :  A list of node labels.
            - theshold_num      :  The maximum number of nodes to threshold.
    """
    # Subgraph with only relevant nodes - pytorch
    s = subgraph(
        torch.cat((torch.tensor([node_idx]), neighbours)), data.edge_index)[0]
    
    # Disregard size of explanations
    node_explanations = np.abs(node_explanations)

    # Create graph of neighbourhood of node of interest
    G = nx.DiGraph()
    G.add_nodes_from(neighbours.detach().numpy())
    G.add_node(node_idx)
    G.nodes[node_idx]["self"] = 1
    if feat is not None:
        for node in G.nodes():
            G.nodes[node]["feat"] = feat[node].detach().numpy()
    if label is not None:
        for node in G.nodes():
            G.nodes[node]["label"] = label[node].item()

    # Find importance threshold required to retrieve 10 most import nei.
    threshold_num = min(len(neighbours), threshold_num)
    threshold = np.sort(
        node_explanations)[-threshold_num]

    # # Keep edges that satisfy the threshold
    # weighted_edge_list = [
    #     (data.edge_index[0, i].item(),
    #      data.edge_index[1, i].item(), weighted_edge_mask[i].item())
    #     for i, _ in enumerate(weighted_edge_mask)
    #     if weighted_edge_mask[i] >= threshold
    # ]

    # Keep edges that satisfy the threshold
    node_expl_dico = {}
    for i, imp in enumerate(node_explanations):
        node_expl_dico[neighbours[i].item()] = imp 
    node_expl_dico[node_idx]=torch.tensor(0)
    weighted_edge_list = [ (el1.item(),el2.item(),node_expl_dico[el1.item()].item()) for el1,el2 in zip(s[0],s[1])
    ]
    # Remove edges from node of interest to neighbours
    weighted_edge_list = [item for item in weighted_edge_list if item[0] != 0]
    G.add_weighted_edges_from(weighted_edge_list)

    # Keep nodes that satisfy the threshold
    del_nodes = []
    for i, node in enumerate(G.nodes()):
        if node != node_idx:
            if node_explanations[i] < threshold:
                del_nodes.append(node)
    G.remove_nodes_from(del_nodes)

    return G