def __call__(self, G, r): # https://arxiv.org/pdf/0803.0929.pdf a = 0 maxT = G maxConductance = 0 vertex2id = {u: i for i, u in enumerate(G)} A = np.zeros((len(vertex2id), len(vertex2id))) for u, v in G.edges(): if u != v: A[vertex2id[u]][vertex2id[v]] = 1 A[vertex2id[v]][vertex2id[u]] = 1 invD = np.diag((A.sum(axis=0) + 1.E-12)**(-1)) L = np.eye(len(vertex2id)) - invD**0.5 @ A @ invD**0.5 pseudoinverse = 0 eigenvalues, eigenvectors = np.linalg.eigh(L) for eigenvalue, eigenvector in zip(eigenvalues, eigenvectors): if eigenvalue > 1.E-12: pseudoinverse += np.outer(eigenvector, eigenvector) / eigenvalue epsilon = 0.1 beta = 1 # 1+-epsilon factor approximation with probability 1-n**-beta (where n is the number of nodes) k = int((4 + 2 * beta) * np.log(len(G)) / (epsilon**2 / 2 - epsilon**3 / 3)) for repeat in range(10 * self.range_a): if repeat % self.repeats == 0: a += 1 Q = np.random.binomial(1, 0.5, (k, len(G))) * 2 - 1 Q = Q / (float(k)**0.5) Z = Q @ pseudoinverse if random() < 1. / self.repeats: a += 1 T = nx.DiGraph() Y = float(len(G)) / sum( np.linalg.norm(Z[vertex2id[u], :] - Z[vertex2id[v], :])**2 for u, v in G.edges()) / a for u, v in G.edges(): resistance = np.linalg.norm(Z[vertex2id[u], :] - Z[vertex2id[v], :])**2 if random() < Y / resistance: T.add_edge(u, v) if r not in T: T = nx.DiGraph() T.add_node(r) continue else: T = nx.traversal.bfs_tree(T, r) cond = conductance(G, T) if cond > maxConductance: maxConductance = cond maxT = T #if maxT != G: # return SOTASparsifier()(maxT, r) return maxT
def core(G, r, trace_method, eps=1.E-6, starting_a=-1, deflation_strategy=1): tr_cond = starting_a tr = None found_a = starting_a while True: a = tr_cond + 1 - eps next_tr = trace_method(G, r, a) tr_cond = conductance(G, next_tr) if next_tr is None or len(next_tr) <= 1 or a <= found_a: break found_a = a tr = next_tr if tr is None: tr = nx.DiGraph() tr.add_node(r) #print('Bounds', len(tr), 0.5*(sum(max(G.out_degree(v)-a*deflation_strategy,0) for v in G if v not in tr))/len(tr)) return tr
def sparsifier(G, r): # https://arxiv.org/pdf/0808.4134.pdf max_cond, max_tree = 0, None max_deg = max(G.degree(v) for v in G) #print('max deg', max_deg) for Y in np.arange(0, 1, 0.01): for _ in range(1): subgraph = nx.DiGraph([ (u, v) for u, v in G.edges() if random() < Y * max_deg / min(G.degree(u), G.degree(v)) ]) #tree = subgraph tree = bfs_tree(subgraph, r) cond = conductance(G, tree) if cond > max_cond: max_cond, max_tree = cond, tree return trivial_graph(r) if max_tree is None else max_tree
def eigenreductor(G, r): vertex2id = {u: i for i, u in enumerate(G)} eigenvalues, eigenvectors = np.linalg.eigh(laplacian(G, vertex2id)) eigorder = sorted(list(range(len(eigenvalues))), key=lambda i: -eigenvalues[i]) max_cond, max_tree = 0, None original_sum = eigenvalues.sum() largest = 0 smallest = len(eigenvalues) - 1 for i in range(len(eigenvalues) - 1): if eigenvalues[eigorder[smallest]] / (eigenvalues.sum() - eigenvalues[ eigorder[largest]]) > eigenvalues[eigorder[smallest - 1]] / ( eigenvalues.sum() - eigenvalues[eigorder[smallest]]): changed = (eigorder[largest], eigenvalues[eigorder[largest]]) eigenvalues[eigorder[largest]] = 0 largest += 1 else: changed = (eigorder[smallest], eigenvalues[eigorder[smallest]]) eigenvalues[eigorder[smallest]] = 0 smallest -= 1 Lapprox = eigenvectors @ np.diag( eigenvalues * original_sum / eigenvalues.sum()) @ eigenvectors.transpose() #for Y in np.arange(0, 1, 0.1): subgraph = nx.DiGraph([ (u, v) for u, v in G.edges() if random() < abs(Lapprox[vertex2id[u], vertex2id[v]]) ]) if r not in subgraph: eigenvalues[changed[0]] = changed[1] continue tree = bfs_tree(subgraph, r) cond = conductance(G, tree) if cond > max_cond: max_cond, max_tree = cond, tree return trivial_graph(r) if max_tree is None else max_tree
#"elod_core": lambda G,r: conductance.trace.core(G, r, conductance.trace.maxELOD), #"greedy_clever": lambda G,r: conductance.spectral.greedy(G, conductance.trace.core(G, r, conductance.trace.cleverRPCST)), #"greedy_recursive": lambda G,r: conductance.spectral.greedy_recursive(G, [r], [lambda a,b: a-(1+sigma)/10.*alpha*b for sigma in range(10)]) } # = {"ACM", "ANT", "DBLP", "Log4J", "Pubmed`", "Squirrel"} datasets = {"ANT1.1": "data/ant_1.1_features.csv", } for dataset in datasets: G = import_graph(datasets[dataset], directed=False).to_directed() results = {method: list() for method in implementation} nodes = [v for v in G if G.out_degree[v]>=2] for i in range(1):# 200 to get Nemenyi statistical significance for sure #print("\n-----------------------") if len(nodes)==0: continue r = nodes[(int)(random()*len(nodes))] for method in implementation: subgraph = implementation[method](G, r) results[method].append(measures.conductance(G, subgraph)) #print(method, len(subgraph)) #for method in implementation: # print(method, ' = ', [round(v) for v in results[method]], '; %', sum(results[method])/len(results[method])) ranks, crit = friedman_ranks(results) print(dataset, "("+str(int(0.5+crit*10)/10.0)+")", "&", " & ".join(str(int(0.5+sum(results[method])/len(results[method])))+" ("+str(int(0.5+ranks[method]*10)/10.0)+")" for method in ranks) )