def test_ncp_l1reg_big(): G = localgraphclustering.GraphLocal() G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ") Glcc = G.largest_component() print(Glcc.adjacency_matrix.data) ncp_instance = localgraphclustering.NCPData(G) df = ncp_instance.l1reg(ratio=0.5)
def test_ncp_grid(): import networkx as nx K10 = nx.grid_graph(dim=[10, 10]) G = lgc.GraphLocal().from_networkx(K10) ncp = lgc.NCPData(G).approxPageRank() df = ncp.as_data_frame() assert (min(df["output_sizeeff"]) > 0)
def test_ncp_l1reg_big(): G = lgc.GraphLocal() G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ", header=True) Glcc = G.largest_component() print(Glcc.adjacency_matrix.data) ncp_instance = lgc.NCPData(G) df = ncp_instance.l1reg(ratio=0.5,nthreads=4)
def test_ncp_clique(): import networkx as nx K10 = nx.complete_graph(10) G = lgc.GraphLocal().from_networkx(K10) ncp = lgc.NCPData(G).approxPageRank() df = ncp.as_data_frame() assert(min(df["output_sizeeff"]) > 0)
def test_ncp_crd_big(): G = localgraphclustering.GraphLocal() G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ") ncp_instance = localgraphclustering.NCPData(G) df = ncp_instance.crd(ratio=0.5,w=10,U=10,h=1000) ncp_plots = localgraphclustering.ncpplots.NCPPlots(df) #plot conductance vs size ncp_plots.cond_by_size() #plot conductance vs volume ncp_plots.cond_by_vol() #plot isoperimetry vs size ncp_plots.isop_by_size()
def test_from_networkx(): import math import numpy as np N = 1000 rad = 3/math.sqrt(N) np.random.seed(0) pos_x = np.random.rand(N) pos_y = np.random.rand(N) pos = {i: (pos_x[i], pos_y[i]) for i in range(N)} G = nx.generators.random_geometric_graph(N,rad,pos=pos) g = lgc.GraphLocal().from_networkx(G) assert (g.adjacency_matrix != g.adjacency_matrix.T).sum() == 0 # On our test system, this returns true, could need to be adjusted # due to the random positions. assert g.is_disconnected() == False
def single_test(test_method, epsilon=1e-4, graph_name='JohnsHopkins'): global alpha global rho global ref_node g = lgc.GraphLocal( f"../../LocalGraphClustering-1/notebooks/datasets/{graph_name}.graphml", "graphml") nodes, p = lgc.approximate_PageRank(g, ref_node, alpha=alpha, rho=rho, epsilon=epsilon, method=test_method) print(test_method) print(f'\n\nnumber of nodes: {len(nodes)}\nnodes:\n{nodes}\n\np:\n{p}\n')
def test_ncp_localmin(): G = load_example_graph() ncp = lgc.NCPData(G) func = lgc.partialfunc(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl") ncp.default_method = func ncp.add_localmin_samples(ratio=1) print(ncp.as_data_frame()) G = lgc.GraphLocal() G.list_to_gl([0,1],[1,0],[1,1]) ncp = lgc.NCPData(G) func = lgc.partialfunc(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl") ncp.default_method = func ncp.add_localmin_samples(ratio=1)
def test_time(test_method, epsilons, graph_name): """ Test time for method """ global alpha global rho global ref_node times = [] g = lgc.GraphLocal( f"../../LocalGraphClustering/notebooks/datasets/{graph_name}.graphml", "graphml") for eps in epsilons: t = time.clock() lgc.approximate_PageRank(g, ref_node, alpha=alpha, rho=rho, epsilon=eps, method=test_method) times.append((time.clock() - t) * 1000) return times
def compare_result(graph_name): global alpha global rho global ref_node eps = 1e-6 g = lgc.GraphLocal( f"../../LocalGraphClustering/notebooks/datasets/{graph_name}.graphml", "graphml") nodes, _ = lgc.approximate_PageRank(g, ref_node, alpha=alpha, rho=rho, epsilon=eps, method='l1reg') print(f'results from non-random method:\n{nodes}') nodes, _ = lgc.approximate_PageRank(g, ref_node, alpha=alpha, rho=rho, epsilon=eps, method='l1reg-rand') print(f'results from random method:\n{nodes}')
def load_example_graph(): return localgraphclustering.GraphLocal( "localgraphclustering/tests/data/dolphins.edges", separator=" ")
def read_minnesota(): g = lgc.GraphLocal('notebooks/datasets/minnesota.edgelist','edgelist',' ')
def test_load(): G = lgc.GraphLocal("localgraphclustering/tests/data/dolphins.edges",separator=" ") assert G.is_disconnected() == False
def test_ncp_crd_big(): G = lgc.GraphLocal() G.read_graph("notebooks/datasets/minnesota.edgelist","edgelist", remove_whitespace=True) ncp_instance = lgc.NCPData(G) df = ncp_instance.crd(ratio=0.5,w=10,U=10,h=1000,nthreads=4) ncp_plots = lgc.ncpplots.NCPPlots(df)
import localgraphclustering as lgc outputPath = "../output/" randFile = "time-rand.txt" normFile = "time-norm.txt" randqFile = "q-rand.txt" normqFile = "q-norm.txt" DatasetName = "JohnsHopkins" ref_node = [3] alphas = np.linspace(0.1, 0.5, 50) rhos = np.logspace(-4, -7, 50) epsilons = np.logspace(-2, -6, 50) print("loading graph...") g = lgc.GraphLocal("../../LocalGraphClustering/notebooks/datasets/{0}.edgelist".format(DatasetName)) def measureTimeEpsilon(methodName): for eps in epsilons: print("eps: ", eps) lgc.approximate_PageRank(g, ref_node, epsilon = eps, method = methodName) def measureTimeAlpha(methodName): for alpha in alphas: print("alpha: ", alpha) lgc.approximate_PageRank(g, ref_node, alpha = alpha, method = methodName) def measureTimeRho(methodName): for rho in rhos: print("rho: ", rho) lgc.approximate_PageRank(g, ref_node, rho = rho, method = methodName)
import numpy as np import localgraphclustering as lgc import matplotlib.pyplot as plt from pagerank.bounded_accelerated_proximal_gradient_descent import BoundedAcceleratedProximalGradientDescent if __name__ == "__main__": graph_name = '../data/ppi_mips.graphml' graph_type = 'graphml' alpha = 0.15 epsilon = 1e-6 rho = 1e-4 graph = lgc.GraphLocal(graph_name, graph_type) seed_nodes = [0] solver = BoundedAcceleratedProximalGradientDescent(graph, seed_nodes=seed_nodes, alpha=alpha, epsilon=epsilon, rho=rho) solver.set_max_iter(100) solver.solve()
def load_graph(self, fname, ftype='edgelist', separator='\t'): self.g = lgc.GraphLocal(fname, ftype, separator)
def lgc_data(name): #lgc_path = os.path.join("..", "LocalGraphClustering") lgc_path = os.path.dirname(os.path.realpath(__file__)) if name=="senate": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "senate.edgelist"), 'edgelist', ' ') elif name=="Erdos02": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "Erdos02-cc.edgelist"), 'edgelist', ' ') elif name=="dolphins": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "dolphins.smat"), 'edgelist', ' ') elif name=="JohnsHopkins": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "JohnsHopkins.edgelist"), 'edgelist', '\t') elif name=="Colgate88": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "Colgate88_reduced.graphml"), 'graphml') elif name=="usroads": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "usroads-cc.edgelist"), 'edgelist', ' ') elif name=="ppi_mips": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "ppi_mips.graphml"), 'graphml') elif name=="ASTRAL": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "ASTRAL-small-sized-mammoth-sims-geq-2.graphml"), 'graphml') elif name=="sfld": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh.graphml"), 'graphml') elif name=="find_V": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "find_V.graphml"), 'graphml') elif name=="ppi-h**o": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "ppi-h**o.edgelist"), 'edgelist', ' ', header=True) elif name=="neuro-fmri-01": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "neuro-fmri-01.edges"), 'edgelist', ' ', header=True) elif name=="ca-GrQc": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "ca-GrQc-cc.csv"), 'edgelist', ' ', header=True) elif name=="disconnected": return lgc.GraphLocal(os.path.join( lgc_path, "../../notebooks/datasets", "disconnected.smat"), 'edgelist', ' ') else: raise Exception("Unknown graph name")
si,ei = g.adjacency_matrix.indptr[node],g.adjacency_matrix.indptr[node+1] neighs = g.adjacency_matrix.indices[si:ei] for i in range(len(neighs)): if visited[neighs[i]] == 0: visited[neighs[i]] = 1 seeds.append(neighs[i]) Q.put(neighs[i]) return seeds import sys sys.path.append("../../../LocalGraphClustering/") import localgraphclustering as lgc G = lgc.GraphLocal("../../dataset/lawlor-spectra-k32.edgelist","edgelist") import pandas as pd from sklearn.neighbors import NearestNeighbors df = pd.read_table("../../dataset/lawlor-spectra-k32.coords",header=None) coords = df[[0,1]].values coords[:,1] *= 4 coords[:,0] *= 10 x,y = -1*coords[:,0],-1*coords[:,1] S = np.nonzero((x>0.002))[0] records_flow = local_embedding(G,S,x,y,ntrials=500,delta=1.0,nprocs=120) wptr = open("records_flow_002_1_node_3.p","wb") pickle.dump(records_flow,wptr)
import time import matplotlib.pyplot as plt import sys, os data_path = os.getcwd() try: import localgraphclustering as lgc except: # when the package is not installed, import the local version instead. # the notebook must be placed in the original "notebooks/" folder sys.path.append("../") import localgraphclustering as lgc g = lgc.GraphLocal(os.path.join(data_path, 'data/ppi_mips.graphml'), 'graphml') def proximal_gradient_descent(A, dn_sqrt, d_sqrt, Q, ref_node, rho, alpha, eps, stepsize=1.): # Number of nodes in the graph n = dn_sqrt.shape[0]
plt.show() pickle.dump(ncp, open('results/' + method + "-ncp-" + gname + '.pickle', 'wb')) ncp.write('results/' + method + "-ncp-csv-" + gname, writepython=False) pickle.dump(ncp2, open('results/' + method + "-ncp2-" + gname + '.pickle', 'wb')) ncp2.write('results/' + method + "-ncp2-csv-" + gname, writepython=False) mygraphs = { #'email-Enron':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/email-Enron.edgelist', #'pokec':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-pokec-relationships.edgelist', 'ppi': '/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/ppi_mips.graphml', 'sfld': '/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh.graphml' } for (gname, gfile) in mygraphs.items(): print(gname, gfile) sep = ' ' if isinstance(gfile, tuple): sep = gfile[1] gfile = gfile[0] g = lgc.GraphLocal(os.path.join("..", "data", gfile), 'graphml', " ") g.discard_weights() run_improve(g, gname=gname, method="mqi", methodname="MQI", delta=100, timeout=100000000)
def read_minnesota(): g = localgraphclustering.GraphLocal('notebooks/datasets/minnesota.edgelist','edgelist',' ')
ncp = lgc.NCPData(g,store_output_clusters=True) ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False) sets = [st["output_cluster"] for st in ncp.results] print("Make an NCP object for Improve Algo") ncp2 = lgc.NCPData(g) print("Going into improve mode") output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta}) fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0] fig.axes[0].set_title(gname + " " + methodname+"-NCP") fig.savefig("figures/" + method + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100)) plt.show() pickle.dump(ncp, open('results/' + method + "delta" + delta + "-ncp-" + gname + '.pickle', 'wb')) pickle.dump(ncp2, open('results/' + method + "delta" + delta + "-ncp2-" + gname + '.pickle', 'wb')) mygraphs = {'email-Enron':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/email-Enron.edgelist', 'pokec':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-pokec-relationships.edgelist', 'livejournal':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-LiveJournal1.edgelist' } start = time.time() for (gname,gfile) in mygraphs.items(): print(gname, gfile) sep = ' ' if isinstance(gfile, tuple): sep = gfile[1] gfile = gfile[0] g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', " ") g.discard_weights() run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.6, timeout=100000000) end = time.time() print("Elapsed time for ", gname , " is ", end - start)
# Import matplotlib import matplotlib.pyplot as plt import sys, traceback import os sys.path.insert(0, os.path.join("..", "LocalGraphClustering", "notebooks")) import helper import pickle import csv print("Running senate") # Read graph. This also supports gml and graphml format. g = lgc.GraphLocal('./datasets/senate.graphml', 'graphml') g.discard_weights() ncp_instance = lgc.NCPData(g) ncp_instance.approxPageRank(ratio=0.8, timeout=5000000, nthreads=24) ncp_plots = lgc.NCPPlots(ncp_instance, method_name="acl") #plot conductance vs size fig, ax, min_tuples = ncp_plots.cond_by_size() plt.savefig('figures/cond_card_senate.png', bbox_inches='tight') plt.show() #plot conductance vs volume fig, ax, min_tuples = ncp_plots.cond_by_vol() plt.savefig('figures/cond_vol_senate.png', bbox_inches='tight') plt.show() #plot isoperimetry vs size