Beispiel #1
0
def test_contains_self_loops():
    row = torch.tensor([0, 1, 0])
    col = torch.tensor([1, 0, 0])

    assert contains_self_loops(torch.stack([row, col], dim=0))

    row = torch.tensor([0, 1, 1])
    col = torch.tensor([1, 0, 2])

    assert not contains_self_loops(torch.stack([row, col], dim=0))
Beispiel #2
0
def CreateAuxGraph(edge_index, pos_i, pos_j, original_vertex_features, pos):
    # Build new auxiliary graph:
    num_graph_verts = original_vertex_features.shape[0]
    new_verts = torch.arange(num_graph_verts,
                             num_graph_verts + len(edge_index[0, :]))  # Add edges in G as vertices in aux G
    new_vertex_pos = torch.cat(
        [pos, torch.zeros(len(new_verts), pos.shape[1]).to(device)]
        , dim=0)
    new_vertex_features = torch.cat(
        [original_vertex_features, torch.zeros(len(new_verts), original_vertex_features.shape[1]).to(device)]
        , dim=0)

    # Compute needed components:
    sources = edge_index[0, :]
    targets = edge_index[1, :]
    edge_indices = num_graph_verts + torch.arange(0, len(edge_index[0, :]))
    edge_indices = edge_indices.to(device)

    new_source_edges = torch.stack((sources, edge_indices), dim=0)
    new_target_edges = torch.stack((targets, edge_indices), dim=0)

    new_vertex_pos[num_graph_verts:, :] = (pos_i + pos_j) / 2
    # Build graph from components:
    edge_index_aux = torch.cat([new_target_edges, new_source_edges], dim=1)

    assert (not contains_self_loops(edge_index_aux))
    assert (not contains_isolated_nodes(edge_index_aux))

    return edge_index_aux, new_vertex_features, new_vertex_pos
Beispiel #3
0
def load_torch_geometric_data(dataset, name):
    cur = os.getcwd()

    if dataset in {'WikiCS', 'Flickr'}:
        data = eval(dataset + "(root = '" + cur.replace("\\", "/") +
                    "/torch_geometric_data/" + dataset + "')")
    else:
        data = eval(dataset + "(root = '" + cur.replace("\\", "/") +
                    "/torch_geometric_data/" + dataset + "'," + "name = '" +
                    name + "')")
    # e.g. Coauthor(root='...', name = 'CS')

    edge = data[0].edge_index
    if contains_self_loops(edge):
        edge = remove_self_loops(edge)[0]
        print("Original data contains self-loop, it is now removed")

    adj = to_dense_adj(edge)[0].numpy()

    print("Nodes: %d, edges: %d, features: %d, classes: %d. \n" %
          (len(adj[0]), len(edge[0]) / 2, len(
              data[0].x[0]), len(np.unique(data[0].y))))

    mask = np.transpose(adj) != adj
    col_sum = adj.sum(axis=0)
    print("Check adjacency matrix is sysmetric: %r" % (mask.sum().item() == 0))
    print("Check the number of isolated nodes: %d" %
          ((col_sum == 0).sum().item()))
    print("Node degree Max: %d, Mean: %.4f, SD: %.4f" %
          (col_sum.max(), col_sum.mean(), col_sum.std()))

    return adj, data[0].x.numpy(), data[0].y.numpy()
Beispiel #4
0
    def forward(self, data, return_hidden_feature=False):

        #import pdb
        #pdb.set_trace()
        if torch.cuda.is_available():
            data.x = data.x.cuda()
            data.edge_attr = data.edge_attr.cuda()
            data.edge_index = data.edge_index.cuda()
            data.batch = data.batch.cuda()

        # make sure that we have undirected graph
        if not is_undirected(data.edge_index):
            data.edge_index = to_undirected(data.edge_index)

        # make sure that nodes can propagate messages to themselves
        if not contains_self_loops(data.edge_index):
            data.edge_index, data.edge_attr = add_self_loops(
                data.edge_index, data.edge_attr.view(-1))

        # covalent_propagation
        # add self loops to enable self propagation
        covalent_edge_index, covalent_edge_attr = self.covalent_neighbor_threshold(
            data.edge_index, data.edge_attr)
        (
            non_covalent_edge_index,
            non_covalent_edge_attr,
        ) = self.non_covalent_neighbor_threshold(data.edge_index,
                                                 data.edge_attr)

        # covalent_propagation and non_covalent_propagation
        covalent_x = self.covalent_propagation(data.x, covalent_edge_index,
                                               covalent_edge_attr)
        non_covalent_x = self.non_covalent_propagation(
            covalent_x, non_covalent_edge_index, non_covalent_edge_attr)

        # zero out the protein features then do ligand only gather...hacky sure but it gets the job done
        non_covalent_ligand_only_x = non_covalent_x
        non_covalent_ligand_only_x[data.x[:, 14] == -1] = 0
        pool_x = self.global_add_pool(non_covalent_ligand_only_x, data.batch)

        # fully connected and output layers
        if return_hidden_feature or self.always_return_hidden_feature:
            # return prediction and atomistic features (covalent result, non-covalent result, pool result)

            avg_covalent_x, _ = avg_pool_x(data.batch, covalent_x, data.batch)
            avg_non_covalent_x, _ = avg_pool_x(data.batch, non_covalent_x,
                                               data.batch)

            fc0_x, fc1_x, output_x = self.output(pool_x,
                                                 return_hidden_feature=True)

            return avg_covalent_x, avg_non_covalent_x, pool_x, fc0_x, fc1_x, output_x
        else:
            return self.output(pool_x)
def test_structured_negative_sampling():
    edge_index = torch.as_tensor([[0, 0, 1, 2], [0, 1, 2, 3]])

    i, j, k = structured_negative_sampling(edge_index)
    assert i.size(0) == edge_index.size(1)
    assert j.size(0) == edge_index.size(1)
    assert k.size(0) == edge_index.size(1)

    adj = torch.zeros(4, 4, dtype=torch.bool)
    adj[i, j] = 1

    neg_adj = torch.zeros(4, 4, dtype=torch.bool)
    neg_adj[i, k] = 1
    assert (adj & neg_adj).sum() == 0

    # Test with no self-loops:
    edge_index = torch.LongTensor([[0, 0, 1, 1, 2], [1, 2, 0, 2, 1]])
    i, j, k = structured_negative_sampling(edge_index, num_nodes=4,
                                           contains_neg_self_loops=False)
    neg_edge_index = torch.vstack([i, k])
    assert not contains_self_loops(neg_edge_index)
Beispiel #6
0
    def forward(self, node_x: torch.Tensor, solution_x: torch.Tensor,
                edge_index: torch.Tensor, batch):

        assert node_x.size() == solution_x.size()
        num_nodes, _ = node_x.size()
        assert num_nodes == edge_index.size(1)
        assert not contains_self_loops(edge_index)

        node_x = self.node_lin(node_x)
        solution_x = self.solution_lin(solution_x)

        # x = self.norm_x(node_x + solution_x, batch)
        # x = F.relu(x)
        x = F.gelu(node_x + solution_x)
        x = self.norm_x(x, batch)

        self._batch = batch
        self.propagate(edge_index, x=x, size=None)

        edge_embedding = self._edge
        self._edge = None

        return edge_embedding
def print_stats():
    for data in DATASETS:
        out = load_data(data)
        num_graphs = len(out)
        avg_nodes = out.data.x.size(0) / num_graphs
        avg_edges = out.data.edge_index.size(1) / num_graphs
        num_features = out.num_features
        num_classes = out.num_classes
        print(
            f'{data}\t{num_graphs}\t{avg_nodes}\t{avg_edges}\t{num_features}\t{num_classes}',
            end='\t')

        undirected, self_loops, isolated_nodes, onehot = True, False, False, True
        for graph in out:
            if not is_undirected(graph.edge_index, num_nodes=graph.num_nodes):
                undirected = False
            if contains_self_loops(graph.edge_index):
                self_loops = True
            if contains_isolated_nodes(graph.edge_index,
                                       num_nodes=graph.num_nodes):
                isolated_nodes = True
            if ((graph.x > 0).sum(dim=1) != 1).sum() > 0:
                onehot = False
        print(f'{undirected}\t{self_loops}\t{isolated_nodes}\t{onehot}')
Beispiel #8
0
 def contains_self_loops(self):
     """Returns :obj:`True`, if the graph does not contain self-loops."""
     return contains_self_loops(self.edge_index)
Beispiel #9
0
def test_contains_self_loops():
    edge_index = torch.tensor([[0, 1, 0], [1, 0, 0]])
    assert contains_self_loops(edge_index)

    edge_index = torch.tensor([[0, 1, 1], [1, 0, 2]])
    assert not contains_self_loops(edge_index)
Beispiel #10
0
 def contains_self_loops(self):
     return contains_self_loops(self.edge_index)
Beispiel #11
0
 def read_files(self, verbose=True):
     start = time.time()
     if verbose:
         print("="*100 + "\n\t\t\t\t Preparing Data for {}\n".format(self.config['data_name']) + "="*100)
         print("\n\n==>> Loading feature matrix and adj matrix....") 
         
     if self.config['data_name'] in ['gossipcop', 'politifact']:
         x_file = os.path.join(self.config['data_path'], self.config['data_name'], 'feat_matrix_lr_train_30_5.npz'.format(self.config['data_name']))
         y_file = os.path.join(self.config['data_path'], self.config['data_name'], 'all_labels_lr_train_30_5.json'.format(self.config['data_name']))
         # adj_name = 'adj_matrix_lr_train_30_5_edge.npy'.format(self.config['data_name']) if self.config['model_name'] != 'HGCN' else 'adj_matrix_lr_train_30_5.npz'.format(self.config['data_name'])
         adj_name = 'adj_matrix_lr_train_30_5_edge.npy'.format(self.config['data_name'])
         edge_index_file = os.path.join(self.config['data_path'], self.config['data_name'], adj_name)
         node2id_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node2id_lr_train_30_5.json'.format(self.config['data_name']))
         node_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node_type_lr_train_30_5.npy'.format(self.config['data_name']))
         split_mask_file = os.path.join(self.config['data_path'], self.config['data_name'], 'split_mask_lr_30_5.json')
         if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']:
             edge_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'edge_type_lr_train_30_5_edge.npy'.format(self.config['data_name']))
     else:
         x_file = os.path.join(self.config['data_path'], self.config['data_name'], 'feat_matrix_lr_top10_train.npz')
         y_file = os.path.join(self.config['data_path'], self.config['data_name'], 'all_labels_lr_top10_train.json')
         # adj_name = 'adj_matrix_lr_top10_train_edge.npy' if self.config['model_name'] != 'HGCN' else 'adj_matrix_lr_top10_train.npz'
         adj_name = 'adj_matrix_lr_top10_train_edge.npy'
         edge_index_file = os.path.join(self.config['data_path'], self.config['data_name'], adj_name)
         node2id_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node2id_lr_top10_train.json')
         node_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node_type_lr_top10_train.npy')
         split_mask_file = os.path.join(self.config['data_path'], self.config['data_name'], 'split_mask_top10.json')
         if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']:
             edge_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'edge_type_lr_top10_edge.npy')
     
 
     # if self.config['model_name'] != 'HGCN':
     #     edge_index_data = np.load(edge_index_file)
     #     edge_index_data = torch.from_numpy(edge_index_data).long()
         
     # elif self.config['model_name'] == 'HGCN':
     #     edge_index_data = load_npz(edge_index_file)
         
     #     # edge_index_data = torch.from_numpy(edge_index_data.toarray())
         
     #     edge_index_data = edge_index_data.tocoo()
     #     indices = torch.from_numpy(np.vstack((edge_index_data.row, edge_index_data.col)).astype(np.int64))
     #     values = torch.Tensor(edge_index_data.data)
     #     shape = torch.Size(edge_index_data.shape)
     #     edge_index_data = torch.sparse.FloatTensor(indices, values, shape)
     
     self.edge_index_data = np.load(edge_index_file)
     self.edge_index_data = torch.from_numpy(edge_index_data).long()
     
     self.x_data = load_npz(x_file)
     self.x_data = torch.from_numpy(self.x_data.toarray())
     self.y_data = json.load(open(y_file, 'r'))
     self.y_data = torch.LongTensor(self.y_data['all_labels'])
     self.node2id = json.load(open(node2id_file, 'r'))
     # node_type = np.load(node_type_file)
     # node_type = torch.from_numpy(node_type).float()
     if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']:
         self.edge_type_data = np.load(edge_type_file)
         self.edge_type_data = torch.from_numpy(self.edge_type_data).long()
     else:
         self.edge_type_data = None
     
     self.split_masks = json.load(open(split_mask_file, 'r'))
     
     num_nodes, self.vocab_size = self.x_data.shape
     if self.config['model_name'] != 'HGCN':
         isolated_nodes = contains_isolated_nodes(edge_index= self.edge_index_data)
         self_loops = contains_self_loops(edge_index= self.edge_index_data)
     
     if verbose:
         print("\n\n" + "-"*50 + "\nDATA STATISTICS:\n" + "-"*50)
         if self.config['model_name'] != 'HGCN':
             print("Contains isolated nodes = ", isolated_nodes)
             print("Contains self loops = ", self_loops)
         print("Vocabulary size = ", self.vocab_size)
         print('No. of nodes in graph = ', num_nodes)
         print('No. of nodes after removing isolated nodes = ', new_num_nodes)
         print("No. of edges in graph = ", self.data.num_edges)
         
         print("\nNo.of train instances = ", self.data.train_mask.sum().item())
         print("No.of val instances = ", self.data.val_mask.sum().item())
         print("No.of test instances = ", num_nodes - self.data.train_mask.sum().item() - self.data.val_mask.sum().item())
             
         end = time.time()
         hours, minutes, seconds = calc_elapsed_time(start, end)
         print("\n"+ "-"*50 + "\nTook  {:0>2} hours: {:0>2} mins: {:05.2f} secs  to Prepare Data\n".format(hours,minutes,seconds))
Beispiel #12
0
    def forward(self, data, return_hidden_feature=False):

        data.x = data.x.cuda()
        data.edge_attr = data.edge_attr.cuda()
        data.edge_index = data.edge_index.cuda()
        data.batch = data.batch.cuda()

        # make sure that we have undirected graph
        if not is_undirected(data.edge_index):
            data.edge_index = to_undirected(data.edge_index)

        # make sure that nodes can propagate messages to themselves
        if not contains_self_loops(data.edge_index):
            data.edge_index, data.edge_attr = add_self_loops(
                data.edge_index, data.edge_attr.view(-1))
        """
        # now select the top 5 closest neighbors to each node


        dense_adj = sparse_to_dense(edge_index=data.edge_index, edge_attr=data.edge_attr)

        #top_k_vals, top_k_idxs = torch.topk(dense_adj, dim=0, k=5, largest=False)

        #dense_adj = torch.zeros_like(dense_adj).scatter(1, top_k_idxs, top_k_vals)
        
        dense_adj[dense_adj == 0] = 10000   # insert artificially large values for 0 valued entries that will throw off NN calculation
        top_k_vals, top_k_idxs = torch.topk(dense_adj, dim=1, k=15, largest=False)
        dense_adj = torch.zeros_like(dense_adj).scatter(1, top_k_idxs, top_k_vals)
        
        data.edge_index, data.edge_attr = dense_to_sparse(dense_adj)
        """

        # covalent_propagation
        # add self loops to enable self propagation
        covalent_edge_index, covalent_edge_attr = self.covalent_neighbor_threshold(
            data.edge_index, data.edge_attr)
        (
            non_covalent_edge_index,
            non_covalent_edge_attr,
        ) = self.non_covalent_neighbor_threshold(data.edge_index,
                                                 data.edge_attr)

        # covalent_propagation and non_covalent_propagation
        covalent_x = self.covalent_propagation(data.x, covalent_edge_index,
                                               covalent_edge_attr)
        non_covalent_x = self.non_covalent_propagation(
            covalent_x, non_covalent_edge_index, non_covalent_edge_attr)

        # zero out the protein features then do ligand only gather...hacky sure but it gets the job done
        non_covalent_ligand_only_x = non_covalent_x
        non_covalent_ligand_only_x[data.x[:, 14] == -1] = 0
        pool_x = self.global_add_pool(non_covalent_ligand_only_x, data.batch)

        # fully connected and output layers
        if return_hidden_feature:
            # return prediction and atomistic features (covalent result, non-covalent result, pool result)

            avg_covalent_x, _ = avg_pool_x(data.batch, covalent_x, data.batch)
            avg_non_covalent_x, _ = avg_pool_x(data.batch, non_covalent_x,
                                               data.batch)

            fc0_x, fc1_x, output_x = self.output(pool_x,
                                                 return_hidden_feature=True)

            return avg_covalent_x, avg_non_covalent_x, pool_x, fc0_x, fc1_x, output_x
        else:
            return self.output(pool_x)