Beispiel #1
0
 def test_cluster_weak(self):
     """
     Cluster_hard is already tested through the regular cluster_graph function.
     To test cluster_weak, this function carries out clustering as
     if a memory effect has been detected.
     """
     graph = deepcopy(g)
     adj_index = dict()
     for i in range(len(graph.nodes)):
         adj_index[list(graph.nodes)[i]] = i
     rev_index = {v: k for k, v in adj_index.items()}
     scoremat, memory, diffs = diffusion(graph=graph,
                                         limit=limit,
                                         iterations=iterations,
                                         verbose=verbose)
     bestcluster = cluster_hard(graph=graph,
                                adj_index=adj_index,
                                rev_index=rev_index,
                                scoremat=scoremat,
                                max_clusters=max_clusters,
                                min_clusters=min_clusters,
                                min_cluster_size=min_cluster_size,
                                verbose=verbose)
     flatcluster = _cluster_vector(bestcluster, adj_index)
     bestcluster = cluster_weak(graph=graph,
                                diffs=diffs,
                                cluster=flatcluster,
                                edgescale=0.5,
                                adj_index=adj_index,
                                rev_index=rev_index,
                                verbose=verbose)
     # Actually, this toy model only opposing-sign nodes,
     # those are too harsh and were therefore removed.
     self.assertEqual(len(bestcluster), 0)
Beispiel #2
0
 def test_diffuse_graph(self):
     """
     Checks if the diffusion process operates correctly.
     """
     new_adj = diffusion(g,
                         iterations=iterations,
                         limit=limit,
                         verbose=verbose)[0]
     self.assertNotEqual(np.mean(new_adj), 0)
Beispiel #3
0
def central_edge(graph, percentile, permutations, error, verbose):
    """
    The min / max values that are the result of the diffusion process
    are used as a centrality measure and define positive as well as negative hub associations.

    If the permutation number is set to a value above 0, the centrality values are tested against permuted graphs.

    The fraction of positive edges and negative edges is based on the ratio between
    positive and negative weights in the network.

    Hence, a network with 90 positive weights and 10 negative weights will have 90% positive hubs returned.

    Parameters
    ----------
    :param graph: NetworkX graph of a microbial association network.
    :param percentile: Determines percentile of hub species to return.
    :param permutations: Number of permutations to carry out. If 0, no permutation test is done.
    :param error: Fraction of edges to rewire for reliability metric.
    :param verbose: Verbosity level of function
    :return: Networkx graph with hub ID / p-value as node property.
    """
    scoremat = diffusion(graph, limit=2, iterations=3, norm=False, verbose=verbose)[0]
    negthresh = np.percentile(scoremat, percentile)
    posthresh = np.percentile(scoremat, 100-percentile)
    neghubs = list(map(tuple, np.argwhere(scoremat <= negthresh)))
    poshubs = list(map(tuple, np.argwhere(scoremat >= posthresh)))
    adj_index = dict()
    for i in range(len(graph.nodes)):
        adj_index[list(graph.nodes)[i]] = i
    if permutations > 0:
        score = perm_edges(graph, percentile=percentile, permutations=permutations,
                           pos=poshubs, neg=neghubs, error=error)
    # need to make sure graph is undirected
    graph = nx.to_undirected(graph)
    # initialize empty dictionary to store edge ID
    edge_vals = dict()
    edge_scores = dict()
    # need to convert matrix index to node ID
    for edge in neghubs:
        node1 = list(graph.nodes)[edge[0]]
        node2 = list(graph.nodes)[edge[1]]
        edge_vals[(node1, node2)] = 'negative hub'
        if permutations > 0 and score is not None:
            edge_scores[(node1, node2)] = score[(adj_index[node1], adj_index[node2])]
    for edge in poshubs:
        node1 = list(graph.nodes)[edge[0]]
        node2 = list(graph.nodes)[edge[1]]
        edge_vals[(node1, node2)] = 'positive hub'
        if permutations > 0 and score is not None:
            edge_scores[(node1, node2)] = score[(adj_index[node1], adj_index[node2])]
    nx.set_edge_attributes(graph, values=edge_vals, name='hub')
    if permutations > 0 and score is not None:
        nx.set_edge_attributes(graph, values=edge_scores, name='reliability score')
Beispiel #4
0
 def test_sparsity_score(self):
     """
     Checks whether correct sparsity scores are calculated.
     Because this network has 3 negative edges separating
     2 clusters, the score should be -3 + the penalty of 2000.
     """
     scoremat = diffusion(g, limit, iterations, verbose)[0]
     clusters = KMeans(2).fit_predict(scoremat)
     adj_index = dict()
     for i in range(len(g.nodes)):
         adj_index[list(g.nodes)[i]] = i
     rev_index = {v: k for k, v in adj_index.items()}
     sparsity = sparsity_score(g, clusters, rev_index)
     self.assertEqual(int(sparsity), 1)
Beispiel #5
0
def perm_edges(graph, permutations, percentile, pos, neg, error):
    """
    Calls the rewire_graph function;
    returns reliability scores of edge centrality scores.
    Scores close to 100 imply that the scores are robust to perturbation.
    Reliability scores as proposed by:
    Frantz, T. L., & Carley, K. M. (2017).
    Reporting a network’s most-central actor with a confidence level.
    Computational and Mathematical Organization Theory, 23(2), 301-312.

    Parameters
    ----------
    :param graph: NetworkX graph of a microbial association network.
    :param permutations: Number of permutations to carry out. If 0, no bootstrapping is done.
    :param percentile: Determines percentile of hub species to return.
    :param pos: List of edges in the upper percentile. (e.g. positive hubs)
    :param neg: List of edges in the lower percentile. (e.g. negative hubs)
    :param error: Fraction of edges to rewire for reliability metric.
    :return: List of reliability scores.
    """
    perms = list()
    for i in range(permutations):
        permutation, swapfail = rewire_graph(graph, error)
        if swapfail:
            return
        adj = diffusion(graph=permutation, limit=2, iterations=3, norm=False, verbose=False)[0]
        perms.append(adj)
        logger.info('Permutation ' + str(i))
    posmatches = dict()
    negmatches = dict()
    for hub in pos:
        posmatches[hub] = 0
    for hub in neg:
        negmatches[hub] = 0
    for perm in perms:
        negthresh = np.percentile(perm, percentile)
        posthresh = np.percentile(perm, 100 - percentile)
        permneg = list(map(tuple, np.argwhere(perm <= negthresh)))
        permpos = list(map(tuple, np.argwhere(perm >= posthresh)))
        matches = set(pos).intersection(permpos)
        for match in matches:
            posmatches[match] += 1
        matches = set(neg).intersection(permneg)
        for match in matches:
            negmatches[match] += 1
    reliability = posmatches.copy()
    reliability.update(negmatches)
    reliability = {k: (v/permutations) for k, v in reliability.items()}
    # p value equals number of permutations that exceeds / is smaller than matrix values
    return reliability
Beispiel #6
0
def cluster_graph(graph, limit, max_clusters, min_clusters, min_cluster_size,
                  iterations, subset, ratio, edgescale, permutations, verbose):
    """
    Takes a networkx graph and carries out network clustering.
    The returned graph contains cluster assignments and weak assignments.
    If weight is available, this is considered during the diffusion process.

    Parameters
    ----------
    :param graph: Weighted, undirected networkx graph.
    :param limit: Percentage in error decrease until matrix is considered converged.
    :param max_clusters: Maximum number of clusters to evaluate in K-means clustering.
    :param min_clusters: Minimum number of clusters to evaluate in K-means clustering.
    :param min_cluster_size: Minimum cluster size as fraction of network size
    :param iterations: If algorithm does not converge, it stops here.
    :param subset: Fraction of edges used in subsetting procedure
    :param ratio: Ratio of scores that need to be positive or negative for a stable edge
    :param edgescale: Mean edge weight for node removal
    :param permutations: Number of permutations for partial iterations
    :param verbose: Verbosity level of function
    :return: NetworkX graph, score matrix and diffusion matrix.
    """
    adj_index = dict()
    for i in range(len(graph.nodes)):
        adj_index[list(graph.nodes)[i]] = i
    rev_index = {v: k for k, v in adj_index.items()}
    # next part is to define scoring matrix
    balanced = [False]
    scoremat, memory, diffs = diffusion(graph=graph,
                                        limit=limit,
                                        iterations=iterations,
                                        verbose=verbose)
    if not nx.is_directed(graph):
        balanced = harary_components(graph, verbose=verbose).values()
        # partial diffusion results in unclosed graphs for directed graphs,
        # and can therefore not be used here.
        if balanced:
            logger.info(
                "This is a balanced network, "
                "so you may be able to get good results with the Kernighan-Lin algorithm."
            )
        if verbose:
            logger.info("Carrying out diffusion on partial graphs. ")
        # ratio from 0.7 to 0.9 appears to give good results on 3 clusters
        scoremat, partials = partial_diffusion(graph=graph,
                                               iterations=iterations,
                                               limit=limit,
                                               subset=subset,
                                               ratio=ratio,
                                               permutations=permutations,
                                               verbose=verbose)
    bestcluster = None
    # the randomclust is a random separation into two clusters
    # if clustering can't beat this, the user is given a warning
    # select optimal cluster by sparsity score
    bestcluster = cluster_hard(graph=graph,
                               adj_index=adj_index,
                               rev_index=rev_index,
                               scoremat=scoremat,
                               max_clusters=max_clusters,
                               min_clusters=min_clusters,
                               min_cluster_size=min_cluster_size,
                               verbose=verbose)
    flatcluster = _cluster_vector(bestcluster, adj_index)
    if not all(balanced):
        weak_nodes = cluster_weak(graph,
                                  diffs=diffs,
                                  cluster=flatcluster,
                                  edgescale=edgescale,
                                  adj_index=adj_index,
                                  rev_index=rev_index,
                                  verbose=verbose)
        weak_dict = dict()
        for node in graph.nodes:
            if adj_index[node] in weak_nodes:
                weak_dict[node] = 'weak'
            else:
                weak_dict[node] = 'strong'
            nx.set_node_attributes(graph, values=weak_dict, name='assignment')
    nx.set_node_attributes(graph, values=bestcluster, name='cluster')
    return graph, scoremat