Esempio n. 1
0
    def __call__(self, G, r):
        # https://arxiv.org/pdf/0803.0929.pdf
        a = 0
        maxT = G
        maxConductance = 0
        vertex2id = {u: i for i, u in enumerate(G)}
        A = np.zeros((len(vertex2id), len(vertex2id)))
        for u, v in G.edges():
            if u != v:
                A[vertex2id[u]][vertex2id[v]] = 1
                A[vertex2id[v]][vertex2id[u]] = 1
        invD = np.diag((A.sum(axis=0) + 1.E-12)**(-1))
        L = np.eye(len(vertex2id)) - invD**0.5 @ A @ invD**0.5
        pseudoinverse = 0
        eigenvalues, eigenvectors = np.linalg.eigh(L)
        for eigenvalue, eigenvector in zip(eigenvalues, eigenvectors):
            if eigenvalue > 1.E-12:
                pseudoinverse += np.outer(eigenvector,
                                          eigenvector) / eigenvalue

        epsilon = 0.1
        beta = 1  # 1+-epsilon factor approximation with probability 1-n**-beta (where n is the number of nodes)
        k = int((4 + 2 * beta) * np.log(len(G)) /
                (epsilon**2 / 2 - epsilon**3 / 3))
        for repeat in range(10 * self.range_a):
            if repeat % self.repeats == 0:
                a += 1
            Q = np.random.binomial(1, 0.5, (k, len(G))) * 2 - 1
            Q = Q / (float(k)**0.5)
            Z = Q @ pseudoinverse

            if random() < 1. / self.repeats:
                a += 1
            T = nx.DiGraph()
            Y = float(len(G)) / sum(
                np.linalg.norm(Z[vertex2id[u], :] - Z[vertex2id[v], :])**2
                for u, v in G.edges()) / a
            for u, v in G.edges():
                resistance = np.linalg.norm(Z[vertex2id[u], :] -
                                            Z[vertex2id[v], :])**2
                if random() < Y / resistance:
                    T.add_edge(u, v)
            if r not in T:
                T = nx.DiGraph()
                T.add_node(r)
                continue
            else:
                T = nx.traversal.bfs_tree(T, r)
            cond = conductance(G, T)
            if cond > maxConductance:
                maxConductance = cond
                maxT = T
        #if maxT != G:
        #    return SOTASparsifier()(maxT, r)
        return maxT
Esempio n. 2
0
def core(G, r, trace_method, eps=1.E-6, starting_a=-1, deflation_strategy=1):
    tr_cond = starting_a
    tr = None
    found_a = starting_a
    while True:
        a = tr_cond + 1 - eps
        next_tr = trace_method(G, r, a)
        tr_cond = conductance(G, next_tr)
        if next_tr is None or len(next_tr) <= 1 or a <= found_a:
            break
        found_a = a
        tr = next_tr
    if tr is None:
        tr = nx.DiGraph()
        tr.add_node(r)
    #print('Bounds', len(tr), 0.5*(sum(max(G.out_degree(v)-a*deflation_strategy,0) for v in G if v not in tr))/len(tr))
    return tr
Esempio n. 3
0
def sparsifier(G, r):
    # https://arxiv.org/pdf/0808.4134.pdf
    max_cond, max_tree = 0, None
    max_deg = max(G.degree(v) for v in G)
    #print('max deg', max_deg)
    for Y in np.arange(0, 1, 0.01):
        for _ in range(1):
            subgraph = nx.DiGraph([
                (u, v) for u, v in G.edges()
                if random() < Y * max_deg / min(G.degree(u), G.degree(v))
            ])
            #tree = subgraph
            tree = bfs_tree(subgraph, r)
            cond = conductance(G, tree)
            if cond > max_cond:
                max_cond, max_tree = cond, tree
    return trivial_graph(r) if max_tree is None else max_tree
Esempio n. 4
0
def eigenreductor(G, r):
    vertex2id = {u: i for i, u in enumerate(G)}
    eigenvalues, eigenvectors = np.linalg.eigh(laplacian(G, vertex2id))
    eigorder = sorted(list(range(len(eigenvalues))),
                      key=lambda i: -eigenvalues[i])
    max_cond, max_tree = 0, None
    original_sum = eigenvalues.sum()
    largest = 0
    smallest = len(eigenvalues) - 1
    for i in range(len(eigenvalues) - 1):
        if eigenvalues[eigorder[smallest]] / (eigenvalues.sum() - eigenvalues[
                eigorder[largest]]) > eigenvalues[eigorder[smallest - 1]] / (
                    eigenvalues.sum() - eigenvalues[eigorder[smallest]]):
            changed = (eigorder[largest], eigenvalues[eigorder[largest]])
            eigenvalues[eigorder[largest]] = 0
            largest += 1
        else:
            changed = (eigorder[smallest], eigenvalues[eigorder[smallest]])
            eigenvalues[eigorder[smallest]] = 0
            smallest -= 1
        Lapprox = eigenvectors @ np.diag(
            eigenvalues * original_sum /
            eigenvalues.sum()) @ eigenvectors.transpose()
        #for Y in np.arange(0, 1, 0.1):
        subgraph = nx.DiGraph([
            (u, v) for u, v in G.edges()
            if random() < abs(Lapprox[vertex2id[u], vertex2id[v]])
        ])
        if r not in subgraph:
            eigenvalues[changed[0]] = changed[1]
            continue
        tree = bfs_tree(subgraph, r)
        cond = conductance(G, tree)
        if cond > max_cond:
            max_cond, max_tree = cond, tree
    return trivial_graph(r) if max_tree is None else max_tree
Esempio n. 5
0
                    #"elod_core": lambda G,r: conductance.trace.core(G, r, conductance.trace.maxELOD),
                    #"greedy_clever": lambda G,r: conductance.spectral.greedy(G, conductance.trace.core(G, r, conductance.trace.cleverRPCST)),
                    #"greedy_recursive": lambda G,r: conductance.spectral.greedy_recursive(G, [r], [lambda a,b: a-(1+sigma)/10.*alpha*b for sigma in range(10)])
                    }

# = {"ACM", "ANT", "DBLP", "Log4J", "Pubmed`", "Squirrel"}
datasets = {"ANT1.1": "data/ant_1.1_features.csv", }
for dataset in datasets:
    G = import_graph(datasets[dataset], directed=False).to_directed()
    results = {method: list() for method in implementation}
    nodes = [v for v in G if G.out_degree[v]>=2]
    for i in range(1):# 200 to get Nemenyi statistical significance for sure
        #print("\n-----------------------")
        if len(nodes)==0:
            continue
        r = nodes[(int)(random()*len(nodes))]

        for method in implementation:
            subgraph = implementation[method](G, r)
            results[method].append(measures.conductance(G, subgraph))
            #print(method, len(subgraph))


        #for method in implementation:
        #    print(method, ' = ', [round(v) for v in results[method]], '; %', sum(results[method])/len(results[method]))
    ranks, crit = friedman_ranks(results)
    print(dataset, "("+str(int(0.5+crit*10)/10.0)+")", "&", " & ".join(str(int(0.5+sum(results[method])/len(results[method])))+" ("+str(int(0.5+ranks[method]*10)/10.0)+")" for method in ranks) )