def evaluate_conductance(graph: nx.Graph, subgraphs, tau):
    """
    :param graph: the graph being evaluated
    :param subgraphs: K cluster of Subsets of the main graph
    :param tau: tuning parameter, tau = 0 = vanilla conductance
    :return: core_cut, vanilla_conductance
    """

    vanilla_conductances = []
    core_cuts = []
    for _, nodes in subgraphs.items():
        subgraph = graph.subgraph(nodes).copy()
        subgraph_complement = set(graph) - set(subgraph)
        cut = nx.cut_size(graph, subgraph, subgraph_complement)
        volume_subgraph = nx.volume(graph, subgraph)
        volume_subgraph_complement = nx.volume(graph, subgraph_complement)
        volume_div = min(volume_subgraph, volume_subgraph_complement)
        vanilla_conductances.append((cut / volume_div))
        core_cuts.append((cut + ((tau / len(graph)) * len(subgraph) * len(subgraph_complement))) / (
                volume_div + (tau * len(subgraph))))
    vanilla_conductance = min(vanilla_conductances)
    core_cut = min(core_cuts)
    logging.debug('Vanilla graph conductance: %f', vanilla_conductance)
    logging.debug('CoreCut graph conductance: %f', core_cut)

    return core_cut, vanilla_conductance
def self_entropy(G, T, alpha):
    """
    self_entropy is a function that estimates the entropy of a node (with id alpha) in a codetree T of graph G
    param G: the given graph
    param T: a codetree of given graph G
    param alpha: the id of a node in T
    return: the self entropy of the node that with id alpha
    """
    if T.get_node(alpha).is_root():
        print("Error paramator in function self_entropy: alpha is illegel.")
        return 0

    parent_id = T.get_node(alpha).bpointer
    leaf_ids_of_node = []
    leaf_ids_of_parent = []
    for node in T.leaves(alpha):
        leaf_ids_of_node.append(node.identifier)
    for node in T.leaves(parent_id):
        leaf_ids_of_parent.append(node.identifier)

    g_node = nx.cut_size(
        G, leaf_ids_of_node,
        list(set(nx.nodes(G)).difference(set(leaf_ids_of_node))))
    v_G = nx.volume(G, nx.nodes(G))
    v_node = nx.volume(G, leaf_ids_of_node)
    v_parent = nx.volume(G, leaf_ids_of_parent)

    entropy = -g_node / v_G * math.log2(v_node / v_parent)
    return entropy
def normalized_cut_size(G,c1,c2,weight=None):
    """Returns the normalized cut size between two containers of nodes
    c1 and c2.

    The normalized cut size is defined as the cut size times the sum
    of the reciprocal sizes of the volumes of the two cuts.[1]

    Parameters
    ----------
    G : NetworkX Graph
    c1, c2 : container
      containsers of nodes
    weight : keyword, optional default=None
      keyword for weight on edges

    Returns
    -------
    normalized_cut_size : float

    See Also
    --------
    cut_size
    volume
    conductance
    expansion

    References
    ----------
    ..[1] David Gleich. 'Heirarchicical Directed Spectral Graph Partitioning'. Website
          report. http://www.stanford.edu/~dgleich/publications/directed-spectral.pdf
    """
    return nx.cut_size(G,c1,c2,weight)*(1./nx.volume(c1,weight) + \
                                        1./nx.volume(c2,weight))
Exemple #4
0
def prefilter_true_variant(G):
    all_nodes = set(G.nodes())
    volumes = [
        nx.volume(G, {w}, "weight") / nx.volume(G, {w}) for w in all_nodes
    ]
    median_volume = np.median(volumes)
    predicted_true_variants = np.where(volumes < median_volume)[0]
    return (predicted_true_variants)
def compute_vanilla_sc(G, resf, debug=False):
    D = np.array(
        [1 / math.sqrt(val) for val in [G.degree()[i] for i in G.nodes()]])
    L = nx.normalized_laplacian_matrix(G)

    start_time = time.time()
    vals, vecs = sp_linalg.eigsh(L, k=8)

    print("vals", vals)
    elapsed = time.time() - start_time
    print("train vanilla time", elapsed, file=resf)
    dict['van_running_time'] = elapsed

    vecs = vecs[:, 1]  # second eigenvector
    y_vecs = D * vecs

    #yns = [ [y_vecs[i], ]]
    i = 0
    yns = []
    for n in G.nodes():
        yns += [[y_vecs[i], n]]
        i += 1
    yns = sorted(yns, key=lambda tup: tup[0])

    total_seq = [el[1] for el in yns]
    random.shuffle(total_seq)
    min_conduct = 1

    for i in tqdm(range(len(y_vecs) - 1),
                  mininterval=10,
                  leave=False,
                  desc='  - (Vanilla)   '):
        seq = total_seq[:(i + 1)]
        rest_seq = total_seq[(i + 1):]

        if (nx.volume(G, seq) != 0 and nx.volume(G, rest_seq) != 0):
            conduct = nx.algorithms.conductance(G=G, S=seq)
            #print("conduct", conduct)
            if conduct < min_conduct:
                min_cardinal = min(len(seq), len(rest_seq))
                min_conduct = conduct
                min_seq = seq.copy()
        else:
            print("Volume is 0")
            print("seq", seq)

    print("train vanilla min_conduct", min_conduct, file=resf)
    print("train vanilla min_cardinal", min_cardinal, file=resf)

    dict['van_balance'] = min_cardinal

    return min_conduct, min_seq
def calc_sum_conductance(column_name):
    sum_conductance = 0
    unique_cluster_names = list(set(df_with_clusters_classified[column_name]))
    for cluster_name in unique_cluster_names:
        S = set(df_with_clusters_classified[
            df_with_clusters_classified[column_name] == cluster_name]
                ['node ID'].values)
        try:
            conductance = (nx.cut_size(G, S) /
                           min(nx.volume(G, S), nx.volume(G, nodes_G - S)))
        except ZeroDivisionError:
            continue
        sum_conductance += conductance
    return sum_conductance
    def getCommuteDistace(G):
        """
        Returns the matrix of commute distance
        """
        verts = list(G.nodes)
        n = len(verts)
        vol = nx.volume(G,verts)

        #get adj matrix
        A = nx.adjacency_matrix(G)
        A = A.todense()

        # use NetworkX to get Laplacian
        L = nx.laplacian_matrix(G)
        L = L.todense()
        Gamma = L + (1/n) * np.ones([n,n])
        CM = np.zeros([n,n])

        #get Moore-Penrose pseudo inverse
        Gamma_pinv = np.linalg.pinv(Gamma, rcond=1e-4)
        # for i in tqdm(range(n-1)) :
        for i in range(n):
            for j in range(i+1,n):
                CM[i,j] = vol*(Gamma_pinv[i,i] + Gamma_pinv[j,j] - 2 * Gamma_pinv[i,j])
                CM[j,i] = CM[i,j]
        return CM
def score_conductance(nodes,graph):
    weight="weight"

    if len(nodes)<4:
        return 0

    #optim
    if len(graph.edges)==0:
        return 0


    nodes_in_graph = nodes & set(graph.nodes())
    if len(nodes_in_graph)<4:
         return 0

    total_degree = nx.volume(graph, nodes_in_graph)
    threashold = np.sqrt(len(nodes_in_graph))
    if total_degree/len(nodes_in_graph)<threashold:
        return 0

    subgraph = nx.subgraph(graph, nodes_in_graph)
    avg_deg = np.average([val for (node, val) in subgraph.degree()])

    if avg_deg<np.sqrt(len(nodes_in_graph)):
        return 0

    try:

        inverse_cond = inverse_conductance(graph,nodes_in_graph,total_degree)
        return inverse_cond
    except:
        return 0
def inverse_conductance(G, S):
    weight = "weight"
    T = set(G) - set(S)
    num_cut_edges = nx.cut_size(G, S, T, weight=weight)
    volume_S = nx.volume(G, S, weight=weight)

    if len(T
           ) == 0:  #if all nodes in the commmunity, bad conductance (avoid /0)
        return 0
    volume_T = nx.volume(G, T, weight=weight)
    volume_T = volume_T + len(
        T
    )  #If only a few nodes outside the community, poor score (trivial solution),
    #but if many nodes outside the community, return good score. And avoid /0

    return 1 - num_cut_edges / min(volume_T, volume_S)
Exemple #10
0
    def getAmp(G):
        """
        Returns the matrix of amplified commute distance
        """
        verts = list(G.nodes)
        n = len(verts)

        vol = nx.volume(G,verts)

        # use NetworkX to get Laplacian
        L = nx.laplacian_matrix(G)
        L = L.todense()
        C_AMP = np.zeros([n,n])

        #get Moore-Penrose pseudo inverse
        L_MP_pseudo_inv = np.linalg.pinv(L, rcond=1e-5, hermitian=True)
        for i in tqdm(range(n-1)) :
            e_i = np.zeros([n,1])
            e_i[i, 0] = 1
            for j in range(i+1, n):
                e_j = np.zeros([n,1])
                e_j[j, 0] = 1
                a = (e_i - e_j)
                b = L_MP_pseudo_inv @ a
                c_ij = ( vol * (np.transpose(a) @ b) )[0,0]
                c_ij_amp = (c_ij / vol) - (1/G.degree(verts[i])) - (1/G.degree(verts[j])) + ( 2/( G.degree(verts[i]) *G.degree(verts[j]) ) )
                C_AMP[i,j] = c_ij_amp
                C_AMP[j,i] = c_ij_amp

        return C_AMP
def calc_sum_ncut(column_name):
    sum_ncut = 0
    unique_cluster_names = list(set(df_with_clusters_classified[column_name]))
    for cluster_name in unique_cluster_names:
        S = set(df_with_clusters_classified[
            df_with_clusters_classified[column_name] == cluster_name]
                ['node ID'].values)
        try:
            ncut = (nx.cut_size(G, S) / nx.volume(G, S))
        except ZeroDivisionError:
            continue
        sum_ncut += ncut
    return sum_ncut
def get_corecut(G, S, tau, n):
    vol = nx.volume(G, S)
    cut = nx.cut_size(G, S)
    s_size = len(S)
    sc_size = n - s_size
    up = cut + (tau / n) * s_size * sc_size
    down = vol + tau * s_size
    if down == 0:
        print("cut: {cut} ,   up: {up},      vol: {vol},      down: {down} ".
              format(cut=cut, up=up, vol=vol, down=down))
        return 1
    else:
        return up / down
def shannon_entropy(G):
    """
    shannon_entropy is a function that estimates the 1-dimensional strutural entropy (i.e. Shonnon entropy) of graph G
    param G: graph
    return: the entropy and the structure None.
    note: defined by Angsheng Li and Yichen Pan in [1], definition 1.
    [1] Angsheng Li, Yicheng Pan: Structural Information and Dynamical Complexity of Networks. IEEE Trans. Information Theory 62(6): 3290-3339 (2016)
    """
    entropy = 0
    vol = nx.volume(G, nx.nodes(G))
    for node in list(nx.nodes(G)):
        p = nx.degree(G, node) / vol
        entropy += -p * math.log2(p)
    return entropy, None
Exemple #14
0
def compute_cond(G, normalized):
    indices = list(enumerate(normalized))
    indices = sorted(indices, key = lambda x : x[1], reverse = True)
    B = set(list(G.nodes))
    A = set()
    min_cond = sys.maxsize
    min_set = None
    volA = 0
    volB = nx.volume(G, G)
    cross = 0
    prev = None
    for tup in indices:
        vertex = tup[0]
        val = tup[1]
        if prev == None:
            prev = val
        else:
            if prev == val:
                A.add(vertex)
                B.remove(vertex)
                if len(B) == 0: break
                d = nx.degree(G, vertex)
                volA += d
                volB -= d
                vol = min(volA, volB)
                for u in G.neighbors(vertex):
                    if u in A:
                        cross -= 1
                    else:
                        cross += 1
            else:
                A.add(vertex)
                B.remove(vertex)
                if len(B) == 0: break
                d = nx.degree(G, vertex)
                volA += d
                volB -= d
                vol = min(volA, volB)
                for u in G.neighbors(vertex):
                    if u in A:
                        cross -= 1
                    else:
                        cross += 1
                cond = cross / vol
                if cond < min_cond:
                    min_cond = cond
                    min_set = A.copy()
            prev = val
    return (min_cond, min_set)
Exemple #15
0
def lovaszSimonovits(myp,G,data):
  
    supp=torch.nonzero(myp).squeeze().tolist()
    degs=data.adj[supp,:].sum(-1)
    sortedsupp=torch.argsort(myp[supp]/degs,descending=True).squeeze().tolist()
    support = [supp[i] for i in sortedsupp]
    vols = []
    sweepset=[]
    probmass=[]
    for i in support:
        sweepset+= [i]
        vols +=[nx.volume(G,sweepset)]
        probmass+=[myp[sweepset].sum()]
    plt.plot(vols,probmass)    
    return probmass,vols    
Exemple #16
0
def sweep(myp,G,data):

    supp = torch.nonzero(myp).squeeze().tolist()
    degs = data.adj[supp,:].sum(-1)
    sortedsupp = torch.argsort(myp[supp]/degs,descending=True).squeeze().tolist()
    support = [supp[i] for i in sortedsupp]
    sweepset = []
    bestconductance = 1000
    bestvolume = 0
    bestset = []
    
    for i in support:
        sweepset += [i]
        volume = nx.volume(G,sweepset)
        conductance = nx.conductance(G,sweepset)
        if(conductance < bestconductance):
            bestconductance = conductance
            bestvolume = volume
            bestset = sweepset
    
    
    return bestset, bestconductance, bestvolume
def compute_regularised_sc(G, resf, debug=False):
    degrees = [val for (node, val) in G.degree()]
    sum_deg = sum(degrees)
    n = len(degrees)
    tau = sum_deg / n

    A = (nx.to_scipy_sparse_matrix(G)).astype(float)

    indices = [i for i in range(n)]
    row = np.array(indices)
    col = row
    data = [1 / math.sqrt(d + tau) for d in degrees]
    D = csr_matrix((data, (row, col)), shape=(n, n))  #degree matrix

    id_data = np.ones(n)
    I = csr_matrix((id_data, (row, col)), shape=(n, n))  #identity matrix

    L = I - (D @ A) @ D

    start_time = time.time()
    vals, vecs = sp_linalg.eigsh(L, k=6)
    elapsed = time.time() - start_time
    print("train regularised time", elapsed, file=resf)

    dict['reg_running_time'] = elapsed

    vecs = vecs[:, 1]  # second eigenvector
    y_vecs = D * vecs

    i = 0
    yns = []
    for node in G.nodes():
        yns += [[y_vecs[i], node]]
        i += 1
    yns = sorted(yns, key=lambda tup: tup[0])

    total_seq = [el[1] for el in yns]
    min_corecut = 1

    for i in tqdm(range(len(y_vecs) - 1),
                  mininterval=3,
                  leave=False,
                  desc='  - (Regularised)   '):
        seq = total_seq[:(i + 1)]
        rest_seq = total_seq[(i + 1):]

        if (nx.volume(G, seq) < nx.volume(G, rest_seq)):
            corecut = get_corecut(G=G, S=seq, tau=tau, n=n)
            if corecut < min_corecut:
                min_cardinal = min(len(seq), len(rest_seq))
                min_corecut = corecut
                min_seq = seq.copy()
        else:
            corecut = get_corecut(G=G, S=rest_seq, tau=tau, n=n)
            if corecut < min_corecut:
                min_cardinal = min(len(seq), len(rest_seq))
                min_corecut = corecut
                min_seq = rest_seq.copy()

    print("train regularised min_corecut", min_corecut, file=resf)
    print("train regularised min_cardinal", min_cardinal, file=resf)

    dict['reg_balance'] = min_cardinal

    return min_corecut, min_seq
Exemple #18
0
 def test_multidigraph(self):
     edges = [(0, 1), (1, 2), (2, 3), (3, 0)]
     G = nx.MultiDiGraph(edges * 2)
     assert_equal(nx.volume(G, set([0, 1])), 4)
Exemple #19
0
    print("Scores for Cut Distance: ", cutDistance[a])
    print("\n")

    # Modularity Score
    print("Modularity: ", ModScore)
    print("\n")
    modularityEnsemble.update({cutDistance[a]: ModScore})

    # Conductance Score
    sumOfCond = []

    for i in range(len(clusterL)):
        if clusterL[i] == []:
            # catch Division by zero
            continue
        if nx.volume(original_graph, clusterL[i]) == 0:
            continue
        # calculate Cond for current Cluster in list of Clusters
        currentCond = conductance(original_graph, clusterL[i])
        sumOfCond.append(currentCond)
        print("Conductance for: ", clusterL[i], " = ", currentCond)

    overallCond = min(sumOfCond)

    print("Overall Conductance: ", overallCond)
    print("\n")
    conductanceEnsemble.update({cutDistance[a]: overallCond})

    # edge betweenness centrality Score
    print("Edge Betweenness Centrality Score: ", averageEdge)
    print("Edge Betweenness Centrality Score: ", totalEdge)
Exemple #20
0
 def test_multidigraph(self):
     edges = [(0, 1), (1, 2), (2, 3), (3, 0)]
     G = nx.MultiDiGraph(edges * 2)
     assert nx.volume(G, {0, 1}) == 4
Exemple #21
0
 def test_barbell(self):
     G = nx.barbell_graph(3, 0)
     assert nx.volume(G, {0, 1, 2}) == 7
     assert nx.volume(G, {3, 4, 5}) == 7
Exemple #22
0
 def test_digraph(self):
     G = nx.DiGraph([(0, 1), (1, 2), (2, 3), (3, 0)])
     assert nx.volume(G, {0, 1}) == 2
Exemple #23
0
 def test_multigraph(self):
     edges = list(nx.cycle_graph(4).edges())
     G = nx.MultiGraph(edges * 2)
     assert nx.volume(G, {0, 1}) == 8
Exemple #24
0
 def test_graph(self):
     G = nx.cycle_graph(4)
     assert_equal(nx.volume(G, set([0, 1])), 4)
Exemple #25
0
 def test_graph(self):
     G = nx.cycle_graph(4)
     assert_equal(nx.volume(G, {0, 1}), 4)
Exemple #26
0
 def test_digraph(self):
     G = nx.DiGraph([(0, 1), (1, 2), (2, 3), (3, 0)])
     assert_equal(nx.volume(G, {0, 1}), 2)
Exemple #27
0
 def test_multigraph(self):
     edges = list(nx.cycle_graph(4).edges())
     G = nx.MultiGraph(edges * 2)
     assert_equal(nx.volume(G, {0, 1}), 8)
Exemple #28
0
 def test_multidigraph(self):
     edges = [(0, 1), (1, 2), (2, 3), (3, 0)]
     G = nx.MultiDiGraph(edges * 2)
     assert_equal(nx.volume(G, {0, 1}), 4)
Exemple #29
0
 def test_multigraph(self):
     edges = list(nx.cycle_graph(4).edges())
     G = nx.MultiGraph(edges * 2)
     assert_equal(nx.volume(G, set([0, 1])), 8)
Exemple #30
0
 def test_graph(self):
     G = nx.cycle_graph(4)
     assert nx.volume(G, {0, 1}) == 4
    print("mynew modscore: ", myNewModScore)
    print("\n")
    print("My Modularity: ", myModScore)
    print("\n")

    modularityEnsemble.update({cutDistance[a]: ModScore})
    modY.append(ModScore)

    # Conductance Score
    sumOfCond = []

    for i in range(len(clusterL)):
        if clusterL[i] == []:
            # catch Division by zero
            continue
        if nx.volume(original_graph, clusterL[i]) == 0:
            continue
        # calculate Cond for current Cluster in list of Clusters
        currentDist = cutDistance[a]
        cutCounter = 0
        for c in Z[:, 2]:
            if currentDist < c:
                cutCounter = cutCounter + 1
        currentCond = cutCounter / nx.volume(original_graph, clusterL[i])
        # currentCond = conductance(original_graph,clusterL[i])
        sumOfCond.append(currentCond)
        print("Conductance for: ", clusterL[i], " = ", currentCond,
              "cut-size: ", cutCounter)

    overallCond = min(sumOfCond)
Exemple #32
0
    print("Precision@{}: {}".format(k, len(right_set)/k))
    print("Recall@{}: {}".format(k, len(right_set)/len(labelList)))    
    if outPath:
        f_r = open('degree_top{}'.format(k), 'w')
        for t in top_k_dict.items():
            f_r.write(t[0]+'\t'+str(t[1])+'\n')
        f_r.close()




if __name__ == "__main__":
    args = parse_args()
    g = Graph()
    start_time = time.time()
    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input, weighted=args.weighted,
                        directed=args.directed)
    cluster = read_cluster(args.cluster)
    print("File read done, elapsed time {}s".format(time.time()-start_time))
    print("Node Size: {}".format(g.G.number_of_nodes()))
    print("Edge Size: {}".format(g.G.number_of_edges()))
    
    start_time = time.time()
    print("S size {}, volume {}".format(len(cluster[0]), nx.volume(g.G, cluster[0], weight='weight')))
    print("T size {}, volume {}".format(len(cluster[1]), nx.volume(g.G, cluster[1], weight='weight')))    
    cond = nx.conductance(g.G, cluster[0], cluster[1], weight='weight')
    print("Conductance {}, elapsed time {}s".format(cond, time.time()-start_time))
Exemple #33
0
 def test_graph(self):
     G = nx.cycle_graph(4)
     assert_equal(nx.volume(G, {0, 1}), 4)
Exemple #34
0
 def test_digraph(self):
     G = nx.DiGraph([(0, 1), (1, 2), (2, 3), (3, 0)])
     assert_equal(nx.volume(G, set([0, 1])), 2)