예제 #1
0
    def __init__(
        self,
        G: Union[nx.Graph, nx.DiGraph],
        norm_rows: bool = True,
    ):
        self.nodes = np.array(list(G.nodes))

        # Create Node labels
        self.node_idx = np.linspace(0,
                                    len(self.nodes),
                                    len(self.nodes),
                                    False,
                                    dtype=int)
        self.node_idx_dict = dict(zip(self.node_idx, self.nodes))
        self.node_idx = torch.LongTensor(self.node_idx)
        self.node_idx.requires_grad = False

        # Derive node similarities in whole graph

        adjacency_matrix = torch.as_tensor(np.array(
            nx.adjacency_matrix(G, weight="weight").todense()),
                                           dtype=float)
        self.sim1 = row_norm(adjacency_matrix)
        self.sim1.requires_grad = False

        self.sim2 = matrix_cosine(self.sim1)
        self.sim2 = row_norm(self.sim2)
        self.sim2.requires_grad = False
예제 #2
0
def embedding_second_order_proximity(
    positions: Tensor,
    norm_rows: bool = True,
) -> Tensor:
    """
    Derives first the pairwise distances, then compares these distance vectors
    between positions to derive proximities of second order.

    Parameters
    ----------
    positions : Union[Tensor,ndarray]
        Input positions, usually nx2
    norm_rows : bool, optional
        If True, rows will be normed to 1, by default True

    Returns
    -------
    Tensor
        Similarity Matrix
    """
    similarity_matrix = embedding_first_order_proximity(positions,
                                                        norm_rows=norm_rows)
    similarity_matrix = matrix_cosine(similarity_matrix)

    if norm_rows:
        similarity_matrix = row_norm(similarity_matrix)
    return similarity_matrix.to(positions.device)
예제 #3
0
def embedding_first_order_proximity(
    positions: Tensor,
    norm_rows: bool = True,
) -> Tensor:
    """
    A simple application of euclidian distance to the positions vector in the embedding space.
    Includes row normalization to get relative distances.

    Parameters
    ----------
    positions : Tensor
        Input positions, usually nx2
    norm_rows : bool, optional
        If True, rows will be normed to 1, by default True

    Returns
    -------
    Tensor
        Similarity Matrix
    """

    assert isinstance(positions, Tensor)

    similarity_matrix = cdist(positions, positions, p=2).to(positions.device)
    if norm_rows:
        similarity_matrix = row_norm(similarity_matrix)
    similarity_matrix = 1 - similarity_matrix

    return similarity_matrix - torch.eye(similarity_matrix.shape[0]).to(
        positions.device)
예제 #4
0
def whole_graph_rows_to_batch(
    similarity_matrix: Union[Tensor, ndarray],
    indecies: Union[Tensor, ndarray, list],
    norm_rows: bool = True,
) -> Tensor:
    """
    Sorts matrix according to indecies and row-normalizes if desired

    Parameters
    ----------
    similarity_matrix : Union[Tensor,ndarray]
        input
    indecies : Union[Tensor, ndarray, list]
        indecies with order
    norm_rows : bool, optional
        whether to row norm, by default True

    Returns
    -------
    Tensor
        similarity_matrix
    """
    similarity_matrix = similarity_matrix[:, indecies]
    if norm_rows:
        similarity_matrix = row_norm(similarity_matrix)
    return torch.as_tensor(similarity_matrix)
예제 #5
0
def aggregate_measures(positions: Union[torch.Tensor, np.ndarray],
                       est_similarities: Union[torch.Tensor, np.ndarray],
                       similarities: Union[torch.Tensor, np.ndarray],
                       cut: float = 0.0):

    if isinstance(positions, torch.Tensor):
        positions = positions.numpy()

    if isinstance(est_similarities, torch.Tensor):
        est_similarities = est_similarities.numpy()

    if isinstance(similarities, torch.Tensor):
        similarities = similarities.numpy()

    # Cutoff
    est_similarities[est_similarities <= cut] = 0

    measure_dict = {}

    ##### Reconstruction Space

    # Precision calculations

    measure_dict["rec_2precision"] = avg_k_precision(
        similarity=similarities, est_similarity=est_similarities, k=2)
    measure_dict["rec_5precision"] = avg_k_precision(
        similarity=similarities, est_similarity=est_similarities, k=5)
    measure_dict["rec_map"] = mean_AP(similarity=similarities,
                                      est_similarity=est_similarities)

    # PageRanks
    G_est = nx.from_numpy_array(est_similarities)
    G_norm = nx.from_numpy_array(similarities)
    pr = nx.pagerank(G_norm)
    pr_index = np.array(list(pr.keys()))
    pr_vals = np.array(list(pr.values()))
    pr_vals = pr_vals[np.argsort(pr_index)]  # Sort by node index

    pr_est = nx.pagerank(G_est)
    pr_est_index = np.array(list(pr_est.keys()))
    pr_est_vals = np.array(list(pr_est.values()))
    pr_est_vals = pr_est_vals[np.argsort(pr_est_index)]  # Sort by node index

    # Now sort by pagerank
    pr_sort = np.argsort(pr_vals)
    pr_sort_est = np.argsort(pr_est_vals)

    measure_dict["rec_pagerank_overlap"] = sum(pr_sort == pr_sort_est)
    measure_dict["rec_pagerank_l2"] = torch.norm(
        torch.as_tensor(pr_vals) - torch.as_tensor(pr_est_vals)).numpy(
        )  # Use torch here for consistency with the loss measures

    # Reconstruction
    measure_dict["rec_l2"] = float(
        torch.norm(
            torch.as_tensor(est_similarities) -
            torch.as_tensor(similarities)).numpy())

    ##### Embedding Space

    # Distances in embedding space
    pos_distance = torch.cdist(torch.as_tensor(positions),
                               torch.as_tensor(positions))
    pos_distance = row_norm(pos_distance).type(torch.DoubleTensor)

    measure_dict["emb_l2"] = float(
        torch.norm(
            torch.as_tensor(pos_distance) -
            torch.as_tensor(similarities)).numpy())

    measure_dict["emb_2precision"] = avg_k_precision(
        similarity=similarities, est_similarity=pos_distance, k=2)
    measure_dict["emb_5precision"] = avg_k_precision(
        similarity=similarities, est_similarity=pos_distance, k=5)
    measure_dict["emb_map"] = mean_AP(similarity=similarities,
                                      est_similarity=pos_distance)

    # SE Distance
    se_distance = torch.cdist(torch.as_tensor(similarities),
                              torch.as_tensor(similarities))
    se_distance = row_norm(se_distance).type(torch.DoubleTensor)

    measure_dict["emb_mean_se_cosine"] = torch.nn.functional.cosine_similarity(
        pos_distance, se_distance, dim=-1).mean().numpy()
    measure_dict["emb_mean_se_l2"] = torch.trace(
        torch.cdist(se_distance, pos_distance)).mean().numpy()

    return measure_dict
예제 #6
0
    def __init__(self,
                 G: Union[nx.Graph, nx.DiGraph],
                 hierarchy_dict: dict = None,
                 norm_rows: bool = True,
                 hierarchy_attention_matrix: Union[torch.Tensor,
                                                   np.ndarray] = None):
        self.nodes = np.array(list(G.nodes))

        # Create Node labels
        self.node_idx = np.linspace(0,
                                    len(self.nodes),
                                    len(self.nodes),
                                    False,
                                    dtype=int)
        self.node_idx_dict = dict(zip(self.node_idx, self.nodes))
        self.rev_node_idx_dict = {v: k for k, v in self.node_idx_dict.items()}
        self.node_idx = torch.LongTensor(self.node_idx)
        self.node_idx.requires_grad = False

        if hierarchy_dict is None:
            hierarchy_dict = {}
            try:
                for node in self.nodes:
                    hierarchy_dict[self.rev_node_idx_dict[node]] = G.nodes[
                        node]['hierarchy']
            except:
                message = "No hierarchy dictionary provides and none found in Graph!"
                logging.error(message)
                raise RuntimeError(message)
        else:
            hierarchy_dict_idx = {}
            try:
                for node in hierarchy_dict.keys():
                    hierarchy_dict_idx[
                        self.rev_node_idx_dict[node]] = hierarchy_dict[node]
            except:
                message = "Could not parse hierarchy dict"
                logging.error(message)
                raise RuntimeError(message)
            hierarchy_dict = hierarchy_dict_idx

        self.hierarchy_dict = hierarchy_dict
        self.hierarchy_vals = np.unique(list(hierarchy_dict.values()))
        self.nr_hierarchies = len(self.hierarchy_vals)
        self.hierarchy_attention_matrix = hierarchy_attention_matrix

        # Derive node similarities in whole graph

        adjacency_matrix = torch.as_tensor(np.array(
            nx.adjacency_matrix(G, weight="weight").todense()),
                                           dtype=float)
        self.sim1 = row_norm(adjacency_matrix)
        self.sim1.requires_grad = False

        self.sim2 = matrix_cosine(self.sim1)
        self.sim2 = row_norm(self.sim2)
        self.sim2.requires_grad = False

        # Derive hierarchy attention matrix

        self.hierarchy = torch.zeros(self.sim1.shape)
        self.hierarchy.requires_grad = False

        if self.hierarchy_attention_matrix is None:
            for node in self.node_idx:
                for peer in self.node_idx:
                    if self.hierarchy_dict[int(node)] >= self.hierarchy_dict[
                            int(peer)]:
                        self.hierarchy[node, peer] = 1
        else:
            for node in self.node_idx:
                for peer in self.node_idx:
                    self.hierarchy[node,
                                   peer] = self.hierarchy_attention_matrix[
                                       self.hierarchy_dict[int(node)],
                                       self.hierarchy_dict[int(peer)]]
예제 #7
0
def nx_first_order_proximity(
    G: Union[nx.Graph, nx.DiGraph],
    node_ids: Union[Tensor, ndarray, list],
    whole_graph_proximity: bool = True,
    to_batch: bool = False,
    norm_rows_in_sample: bool = False,
    norm_rows: bool = True,
) -> Tensor:
    """
    Takes a networkx graph G and generates first-order node proximities.

    Diagonal elements are set to zero.

    Note that this includes non-PyTorch operations!

    Parameters
    ----------
    G : Union[nx.Graph,nx.DiGraph]
        Input graph
    node_ids : Union[Tensor,ndarray,list]
        List of nodes. Must exist in G.
    whole_graph_proximity : bool, optional
        If True, similarities between nodes in node_ids is computed based
        on all alters in the graph (including those not in node_ids)
        If False, similarities are only calculated based on nodes contained in
        node_ids.
        ATTN: Note that if True, ordering of rows reflects G.nodes
        if False, ordering reflects node_ids supplied (subnetwork)
        by default True
    to_batch : bool, optional
        If true, will remove the row entries of nodes not in node_list
        If norm_rows is True, will also re-norm the rows, by default True
    norm_rows_in_sample : bool, optional
        If True, distances are scaled such that the highest distance is 1.
        This implies that distances depend on the sample provided, by default False
    norm_rows: bool, optional
        If True, distances are scaled for each node, such that sum(a_ij)=1
        This does not take into account the similarity to itself, a_ii, which is always 0.
    Returns
    -------
    ndarray
        Similarity matrix of dimension len(node_ids)^2
    """

    if isinstance(node_ids, list):
        node_ids = np.array(node_ids)
    if isinstance(node_ids, Tensor):
        node_ids = node_ids.numpy()

    if whole_graph_proximity:
        adjacency_matrix = np.zeros([len(G.nodes), len(G.nodes)])
    else:
        adjacency_matrix = np.zeros([len(node_ids), len(node_ids)])

    if whole_graph_proximity:
        adjacency_matrix = np.array(
            nx.adjacency_matrix(G, weight="weight").todense())
    else:
        G_sub = G.subgraph(node_ids)
        for i, node in enumerate(node_ids):
            for j, (alter, datadict) in enumerate(G_sub[node].items()):
                if hasattr(datadict, "weight"):
                    weight = datadict["weight"]
                else:
                    weight = 1
                adjacency_matrix[i, j] = weight

    if norm_rows_in_sample:
        adjacency_matrix = adjacency_matrix / np.max(
            adjacency_matrix)  # Norm max similarity within the sample to 1
    if norm_rows and not to_batch:
        adjacency_matrix = row_norm(adjacency_matrix)
    adjacency_matrix = np.nan_to_num(adjacency_matrix, copy=False)
    if whole_graph_proximity:
        selection = np.searchsorted(np.array(G.nodes), node_ids)
        assert (np.array(G.nodes)[selection] == node_ids
                ).all(), "Internal error, subsetting nodes"
        adjacency_matrix = adjacency_matrix[selection, :]
        if to_batch:
            adjacency_matrix = whole_graph_rows_to_batch(adjacency_matrix,
                                                         selection,
                                                         norm_rows=norm_rows)

    return torch.as_tensor(adjacency_matrix)
예제 #8
0
def second_order_proximity(
    adjacency_matrix: Union[Tensor, ndarray],
    indecies: Union[Tensor, ndarray, list] = None,
    whole_graph_proximity: bool = True,
    to_batch: bool = False,
    distance_metric: str = "cosine",
    norm_rows_in_sample: bool = False,
    norm_rows: bool = True,
) -> Tensor:
    """
    Takes an adjacency matrix and generates second-order node proximities, also known
    as structural equivalence relations.
    Nodes are similar, if they share similar ties to alters.

    Diagonal elements are set to zero.

    Note that this includes non-PyTorch operations!

    Parameters
    ----------
    adjacency_matrix: Union[Tensor, ndarray]
        Input adjacency_matrix
    indecies : Union[Tensor,ndarray,list]
        List of node indecies to consider in the matrix
    whole_graph_proximity : bool, optional
        If True, similarities between nodes in indecies is computed based
        on all alters in the matrix (including those not in indecies)
        If False, similarities are only calculated based on nodes contained in
        indecies.
    to_batch : bool, optional
        If true, will remove the row entries of nodes not in indecies
        If norm_rows is True, will also re-norm the rows, by default True
    distance_metric : str, optional
        Any distance metric from scipy.spatial.distance that works
        without parameter, by default 'cosine'
    norm_rows_in_sample : bool, optional
        If True, distances are scaled such that the highest distance is 1.
        This implies that distances depend on the sample provided, by default False
    norm_rows: bool, optional
        If True, distances are scaled for each node, such that sum(a_ij)=1
        This does not take into account the similarity to itself, a_ii, which is always 0.
    Returns
    -------
    ndarray
        Similarity matrix of dimension len(node_ids)^2
    """
    if indecies is None:
        indecies = np.arange(0, adjacency_matrix.shape[0])
    else:
        if isinstance(indecies, list):
            indecies = np.array(indecies)
        if isinstance(indecies, Tensor):
            indecies = indecies.numpy()
    if isinstance(adjacency_matrix, Tensor):
        adjacency_matrix = adjacency_matrix.numpy()

    if not whole_graph_proximity:
        adjacency_matrix = adjacency_matrix[indecies, :]
        adjacency_matrix = adjacency_matrix[:, indecies]

    similarity_matrix = pdist(adjacency_matrix, metric=distance_metric)
    similarity_matrix = 1 - squareform(similarity_matrix)
    similarity_matrix = similarity_matrix - np.eye(similarity_matrix.shape[0],
                                                   similarity_matrix.shape[1])
    if norm_rows_in_sample:
        similarity_matrix = similarity_matrix / np.max(
            similarity_matrix)  # Norm max similarity within the sample to 1
    if norm_rows and not to_batch:
        similarity_matrix = row_norm(similarity_matrix)
    similarity_matrix = np.nan_to_num(similarity_matrix, copy=False)
    if whole_graph_proximity:
        similarity_matrix = similarity_matrix[indecies, :]
        if to_batch:
            similarity_matrix = whole_graph_rows_to_batch(similarity_matrix,
                                                          indecies,
                                                          norm_rows=norm_rows)

    return torch.as_tensor(similarity_matrix)