def __init__( self, G: Union[nx.Graph, nx.DiGraph], norm_rows: bool = True, ): self.nodes = np.array(list(G.nodes)) # Create Node labels self.node_idx = np.linspace(0, len(self.nodes), len(self.nodes), False, dtype=int) self.node_idx_dict = dict(zip(self.node_idx, self.nodes)) self.node_idx = torch.LongTensor(self.node_idx) self.node_idx.requires_grad = False # Derive node similarities in whole graph adjacency_matrix = torch.as_tensor(np.array( nx.adjacency_matrix(G, weight="weight").todense()), dtype=float) self.sim1 = row_norm(adjacency_matrix) self.sim1.requires_grad = False self.sim2 = matrix_cosine(self.sim1) self.sim2 = row_norm(self.sim2) self.sim2.requires_grad = False
def embedding_second_order_proximity( positions: Tensor, norm_rows: bool = True, ) -> Tensor: """ Derives first the pairwise distances, then compares these distance vectors between positions to derive proximities of second order. Parameters ---------- positions : Union[Tensor,ndarray] Input positions, usually nx2 norm_rows : bool, optional If True, rows will be normed to 1, by default True Returns ------- Tensor Similarity Matrix """ similarity_matrix = embedding_first_order_proximity(positions, norm_rows=norm_rows) similarity_matrix = matrix_cosine(similarity_matrix) if norm_rows: similarity_matrix = row_norm(similarity_matrix) return similarity_matrix.to(positions.device)
def embedding_first_order_proximity( positions: Tensor, norm_rows: bool = True, ) -> Tensor: """ A simple application of euclidian distance to the positions vector in the embedding space. Includes row normalization to get relative distances. Parameters ---------- positions : Tensor Input positions, usually nx2 norm_rows : bool, optional If True, rows will be normed to 1, by default True Returns ------- Tensor Similarity Matrix """ assert isinstance(positions, Tensor) similarity_matrix = cdist(positions, positions, p=2).to(positions.device) if norm_rows: similarity_matrix = row_norm(similarity_matrix) similarity_matrix = 1 - similarity_matrix return similarity_matrix - torch.eye(similarity_matrix.shape[0]).to( positions.device)
def whole_graph_rows_to_batch( similarity_matrix: Union[Tensor, ndarray], indecies: Union[Tensor, ndarray, list], norm_rows: bool = True, ) -> Tensor: """ Sorts matrix according to indecies and row-normalizes if desired Parameters ---------- similarity_matrix : Union[Tensor,ndarray] input indecies : Union[Tensor, ndarray, list] indecies with order norm_rows : bool, optional whether to row norm, by default True Returns ------- Tensor similarity_matrix """ similarity_matrix = similarity_matrix[:, indecies] if norm_rows: similarity_matrix = row_norm(similarity_matrix) return torch.as_tensor(similarity_matrix)
def aggregate_measures(positions: Union[torch.Tensor, np.ndarray], est_similarities: Union[torch.Tensor, np.ndarray], similarities: Union[torch.Tensor, np.ndarray], cut: float = 0.0): if isinstance(positions, torch.Tensor): positions = positions.numpy() if isinstance(est_similarities, torch.Tensor): est_similarities = est_similarities.numpy() if isinstance(similarities, torch.Tensor): similarities = similarities.numpy() # Cutoff est_similarities[est_similarities <= cut] = 0 measure_dict = {} ##### Reconstruction Space # Precision calculations measure_dict["rec_2precision"] = avg_k_precision( similarity=similarities, est_similarity=est_similarities, k=2) measure_dict["rec_5precision"] = avg_k_precision( similarity=similarities, est_similarity=est_similarities, k=5) measure_dict["rec_map"] = mean_AP(similarity=similarities, est_similarity=est_similarities) # PageRanks G_est = nx.from_numpy_array(est_similarities) G_norm = nx.from_numpy_array(similarities) pr = nx.pagerank(G_norm) pr_index = np.array(list(pr.keys())) pr_vals = np.array(list(pr.values())) pr_vals = pr_vals[np.argsort(pr_index)] # Sort by node index pr_est = nx.pagerank(G_est) pr_est_index = np.array(list(pr_est.keys())) pr_est_vals = np.array(list(pr_est.values())) pr_est_vals = pr_est_vals[np.argsort(pr_est_index)] # Sort by node index # Now sort by pagerank pr_sort = np.argsort(pr_vals) pr_sort_est = np.argsort(pr_est_vals) measure_dict["rec_pagerank_overlap"] = sum(pr_sort == pr_sort_est) measure_dict["rec_pagerank_l2"] = torch.norm( torch.as_tensor(pr_vals) - torch.as_tensor(pr_est_vals)).numpy( ) # Use torch here for consistency with the loss measures # Reconstruction measure_dict["rec_l2"] = float( torch.norm( torch.as_tensor(est_similarities) - torch.as_tensor(similarities)).numpy()) ##### Embedding Space # Distances in embedding space pos_distance = torch.cdist(torch.as_tensor(positions), torch.as_tensor(positions)) pos_distance = row_norm(pos_distance).type(torch.DoubleTensor) measure_dict["emb_l2"] = float( torch.norm( torch.as_tensor(pos_distance) - torch.as_tensor(similarities)).numpy()) measure_dict["emb_2precision"] = avg_k_precision( similarity=similarities, est_similarity=pos_distance, k=2) measure_dict["emb_5precision"] = avg_k_precision( similarity=similarities, est_similarity=pos_distance, k=5) measure_dict["emb_map"] = mean_AP(similarity=similarities, est_similarity=pos_distance) # SE Distance se_distance = torch.cdist(torch.as_tensor(similarities), torch.as_tensor(similarities)) se_distance = row_norm(se_distance).type(torch.DoubleTensor) measure_dict["emb_mean_se_cosine"] = torch.nn.functional.cosine_similarity( pos_distance, se_distance, dim=-1).mean().numpy() measure_dict["emb_mean_se_l2"] = torch.trace( torch.cdist(se_distance, pos_distance)).mean().numpy() return measure_dict
def __init__(self, G: Union[nx.Graph, nx.DiGraph], hierarchy_dict: dict = None, norm_rows: bool = True, hierarchy_attention_matrix: Union[torch.Tensor, np.ndarray] = None): self.nodes = np.array(list(G.nodes)) # Create Node labels self.node_idx = np.linspace(0, len(self.nodes), len(self.nodes), False, dtype=int) self.node_idx_dict = dict(zip(self.node_idx, self.nodes)) self.rev_node_idx_dict = {v: k for k, v in self.node_idx_dict.items()} self.node_idx = torch.LongTensor(self.node_idx) self.node_idx.requires_grad = False if hierarchy_dict is None: hierarchy_dict = {} try: for node in self.nodes: hierarchy_dict[self.rev_node_idx_dict[node]] = G.nodes[ node]['hierarchy'] except: message = "No hierarchy dictionary provides and none found in Graph!" logging.error(message) raise RuntimeError(message) else: hierarchy_dict_idx = {} try: for node in hierarchy_dict.keys(): hierarchy_dict_idx[ self.rev_node_idx_dict[node]] = hierarchy_dict[node] except: message = "Could not parse hierarchy dict" logging.error(message) raise RuntimeError(message) hierarchy_dict = hierarchy_dict_idx self.hierarchy_dict = hierarchy_dict self.hierarchy_vals = np.unique(list(hierarchy_dict.values())) self.nr_hierarchies = len(self.hierarchy_vals) self.hierarchy_attention_matrix = hierarchy_attention_matrix # Derive node similarities in whole graph adjacency_matrix = torch.as_tensor(np.array( nx.adjacency_matrix(G, weight="weight").todense()), dtype=float) self.sim1 = row_norm(adjacency_matrix) self.sim1.requires_grad = False self.sim2 = matrix_cosine(self.sim1) self.sim2 = row_norm(self.sim2) self.sim2.requires_grad = False # Derive hierarchy attention matrix self.hierarchy = torch.zeros(self.sim1.shape) self.hierarchy.requires_grad = False if self.hierarchy_attention_matrix is None: for node in self.node_idx: for peer in self.node_idx: if self.hierarchy_dict[int(node)] >= self.hierarchy_dict[ int(peer)]: self.hierarchy[node, peer] = 1 else: for node in self.node_idx: for peer in self.node_idx: self.hierarchy[node, peer] = self.hierarchy_attention_matrix[ self.hierarchy_dict[int(node)], self.hierarchy_dict[int(peer)]]
def nx_first_order_proximity( G: Union[nx.Graph, nx.DiGraph], node_ids: Union[Tensor, ndarray, list], whole_graph_proximity: bool = True, to_batch: bool = False, norm_rows_in_sample: bool = False, norm_rows: bool = True, ) -> Tensor: """ Takes a networkx graph G and generates first-order node proximities. Diagonal elements are set to zero. Note that this includes non-PyTorch operations! Parameters ---------- G : Union[nx.Graph,nx.DiGraph] Input graph node_ids : Union[Tensor,ndarray,list] List of nodes. Must exist in G. whole_graph_proximity : bool, optional If True, similarities between nodes in node_ids is computed based on all alters in the graph (including those not in node_ids) If False, similarities are only calculated based on nodes contained in node_ids. ATTN: Note that if True, ordering of rows reflects G.nodes if False, ordering reflects node_ids supplied (subnetwork) by default True to_batch : bool, optional If true, will remove the row entries of nodes not in node_list If norm_rows is True, will also re-norm the rows, by default True norm_rows_in_sample : bool, optional If True, distances are scaled such that the highest distance is 1. This implies that distances depend on the sample provided, by default False norm_rows: bool, optional If True, distances are scaled for each node, such that sum(a_ij)=1 This does not take into account the similarity to itself, a_ii, which is always 0. Returns ------- ndarray Similarity matrix of dimension len(node_ids)^2 """ if isinstance(node_ids, list): node_ids = np.array(node_ids) if isinstance(node_ids, Tensor): node_ids = node_ids.numpy() if whole_graph_proximity: adjacency_matrix = np.zeros([len(G.nodes), len(G.nodes)]) else: adjacency_matrix = np.zeros([len(node_ids), len(node_ids)]) if whole_graph_proximity: adjacency_matrix = np.array( nx.adjacency_matrix(G, weight="weight").todense()) else: G_sub = G.subgraph(node_ids) for i, node in enumerate(node_ids): for j, (alter, datadict) in enumerate(G_sub[node].items()): if hasattr(datadict, "weight"): weight = datadict["weight"] else: weight = 1 adjacency_matrix[i, j] = weight if norm_rows_in_sample: adjacency_matrix = adjacency_matrix / np.max( adjacency_matrix) # Norm max similarity within the sample to 1 if norm_rows and not to_batch: adjacency_matrix = row_norm(adjacency_matrix) adjacency_matrix = np.nan_to_num(adjacency_matrix, copy=False) if whole_graph_proximity: selection = np.searchsorted(np.array(G.nodes), node_ids) assert (np.array(G.nodes)[selection] == node_ids ).all(), "Internal error, subsetting nodes" adjacency_matrix = adjacency_matrix[selection, :] if to_batch: adjacency_matrix = whole_graph_rows_to_batch(adjacency_matrix, selection, norm_rows=norm_rows) return torch.as_tensor(adjacency_matrix)
def second_order_proximity( adjacency_matrix: Union[Tensor, ndarray], indecies: Union[Tensor, ndarray, list] = None, whole_graph_proximity: bool = True, to_batch: bool = False, distance_metric: str = "cosine", norm_rows_in_sample: bool = False, norm_rows: bool = True, ) -> Tensor: """ Takes an adjacency matrix and generates second-order node proximities, also known as structural equivalence relations. Nodes are similar, if they share similar ties to alters. Diagonal elements are set to zero. Note that this includes non-PyTorch operations! Parameters ---------- adjacency_matrix: Union[Tensor, ndarray] Input adjacency_matrix indecies : Union[Tensor,ndarray,list] List of node indecies to consider in the matrix whole_graph_proximity : bool, optional If True, similarities between nodes in indecies is computed based on all alters in the matrix (including those not in indecies) If False, similarities are only calculated based on nodes contained in indecies. to_batch : bool, optional If true, will remove the row entries of nodes not in indecies If norm_rows is True, will also re-norm the rows, by default True distance_metric : str, optional Any distance metric from scipy.spatial.distance that works without parameter, by default 'cosine' norm_rows_in_sample : bool, optional If True, distances are scaled such that the highest distance is 1. This implies that distances depend on the sample provided, by default False norm_rows: bool, optional If True, distances are scaled for each node, such that sum(a_ij)=1 This does not take into account the similarity to itself, a_ii, which is always 0. Returns ------- ndarray Similarity matrix of dimension len(node_ids)^2 """ if indecies is None: indecies = np.arange(0, adjacency_matrix.shape[0]) else: if isinstance(indecies, list): indecies = np.array(indecies) if isinstance(indecies, Tensor): indecies = indecies.numpy() if isinstance(adjacency_matrix, Tensor): adjacency_matrix = adjacency_matrix.numpy() if not whole_graph_proximity: adjacency_matrix = adjacency_matrix[indecies, :] adjacency_matrix = adjacency_matrix[:, indecies] similarity_matrix = pdist(adjacency_matrix, metric=distance_metric) similarity_matrix = 1 - squareform(similarity_matrix) similarity_matrix = similarity_matrix - np.eye(similarity_matrix.shape[0], similarity_matrix.shape[1]) if norm_rows_in_sample: similarity_matrix = similarity_matrix / np.max( similarity_matrix) # Norm max similarity within the sample to 1 if norm_rows and not to_batch: similarity_matrix = row_norm(similarity_matrix) similarity_matrix = np.nan_to_num(similarity_matrix, copy=False) if whole_graph_proximity: similarity_matrix = similarity_matrix[indecies, :] if to_batch: similarity_matrix = whole_graph_rows_to_batch(similarity_matrix, indecies, norm_rows=norm_rows) return torch.as_tensor(similarity_matrix)