def test_ncp_l1reg_big():
    G = localgraphclustering.GraphLocal()
    G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ")
    Glcc = G.largest_component()
    print(Glcc.adjacency_matrix.data)
    ncp_instance = localgraphclustering.NCPData(G)
    df = ncp_instance.l1reg(ratio=0.5)
Exemple #2
0
def test_ncp_grid():
    import networkx as nx
    K10 = nx.grid_graph(dim=[10, 10])
    G = lgc.GraphLocal().from_networkx(K10)
    ncp = lgc.NCPData(G).approxPageRank()
    df = ncp.as_data_frame()
    assert (min(df["output_sizeeff"]) > 0)
def test_ncp_l1reg_big():
    G = lgc.GraphLocal()
    G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ", header=True)
    Glcc = G.largest_component()
    print(Glcc.adjacency_matrix.data)
    ncp_instance = lgc.NCPData(G)
    df = ncp_instance.l1reg(ratio=0.5,nthreads=4)
def test_ncp_clique():
    import networkx as nx
    K10 = nx.complete_graph(10)
    G = lgc.GraphLocal().from_networkx(K10)
    ncp = lgc.NCPData(G).approxPageRank()
    df = ncp.as_data_frame()
    assert(min(df["output_sizeeff"]) > 0)
def test_ncp_crd_big():
    G = localgraphclustering.GraphLocal()
    G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ")
    ncp_instance = localgraphclustering.NCPData(G)
    df = ncp_instance.crd(ratio=0.5,w=10,U=10,h=1000)
    ncp_plots = localgraphclustering.ncpplots.NCPPlots(df)
    #plot conductance vs size
    ncp_plots.cond_by_size()
    #plot conductance vs volume
    ncp_plots.cond_by_vol()
    #plot isoperimetry vs size
    ncp_plots.isop_by_size()
Exemple #6
0
def test_from_networkx():
    import math
    import numpy as np
    N = 1000
    rad = 3/math.sqrt(N)
    np.random.seed(0)
    pos_x = np.random.rand(N)
    pos_y = np.random.rand(N)
    pos = {i: (pos_x[i], pos_y[i]) for i in range(N)}
    G = nx.generators.random_geometric_graph(N,rad,pos=pos)
    g = lgc.GraphLocal().from_networkx(G)
    assert (g.adjacency_matrix != g.adjacency_matrix.T).sum() == 0
    # On our test system, this returns true, could need to be adjusted
    # due to the random positions.
    assert g.is_disconnected() == False
Exemple #7
0
def single_test(test_method, epsilon=1e-4, graph_name='JohnsHopkins'):
    global alpha
    global rho
    global ref_node
    g = lgc.GraphLocal(
        f"../../LocalGraphClustering-1/notebooks/datasets/{graph_name}.graphml",
        "graphml")
    nodes, p = lgc.approximate_PageRank(g,
                                        ref_node,
                                        alpha=alpha,
                                        rho=rho,
                                        epsilon=epsilon,
                                        method=test_method)
    print(test_method)
    print(f'\n\nnumber of nodes: {len(nodes)}\nnodes:\n{nodes}\n\np:\n{p}\n')
def test_ncp_localmin():
    G = load_example_graph()
    ncp = lgc.NCPData(G)
    func = lgc.partialfunc(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl")

    ncp.default_method = func
    ncp.add_localmin_samples(ratio=1)
    print(ncp.as_data_frame())


    G = lgc.GraphLocal()
    G.list_to_gl([0,1],[1,0],[1,1])
    ncp = lgc.NCPData(G)
    func = lgc.partialfunc(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl")

    ncp.default_method = func
    ncp.add_localmin_samples(ratio=1)
Exemple #9
0
def test_time(test_method, epsilons, graph_name):
    """
    Test time for method
    """
    global alpha
    global rho
    global ref_node

    times = []
    g = lgc.GraphLocal(
        f"../../LocalGraphClustering/notebooks/datasets/{graph_name}.graphml",
        "graphml")
    for eps in epsilons:
        t = time.clock()
        lgc.approximate_PageRank(g,
                                 ref_node,
                                 alpha=alpha,
                                 rho=rho,
                                 epsilon=eps,
                                 method=test_method)
        times.append((time.clock() - t) * 1000)
    return times
Exemple #10
0
def compare_result(graph_name):
    global alpha
    global rho
    global ref_node

    eps = 1e-6
    g = lgc.GraphLocal(
        f"../../LocalGraphClustering/notebooks/datasets/{graph_name}.graphml",
        "graphml")
    nodes, _ = lgc.approximate_PageRank(g,
                                        ref_node,
                                        alpha=alpha,
                                        rho=rho,
                                        epsilon=eps,
                                        method='l1reg')
    print(f'results from non-random method:\n{nodes}')
    nodes, _ = lgc.approximate_PageRank(g,
                                        ref_node,
                                        alpha=alpha,
                                        rho=rho,
                                        epsilon=eps,
                                        method='l1reg-rand')
    print(f'results from random method:\n{nodes}')
def load_example_graph():
    return localgraphclustering.GraphLocal(
        "localgraphclustering/tests/data/dolphins.edges", separator=" ")
def read_minnesota():
    g = lgc.GraphLocal('notebooks/datasets/minnesota.edgelist','edgelist',' ')
Exemple #13
0
def test_load():
    G = lgc.GraphLocal("localgraphclustering/tests/data/dolphins.edges",separator=" ")
    assert G.is_disconnected() == False
def test_ncp_crd_big():
    G = lgc.GraphLocal()
    G.read_graph("notebooks/datasets/minnesota.edgelist","edgelist", remove_whitespace=True)
    ncp_instance = lgc.NCPData(G)
    df = ncp_instance.crd(ratio=0.5,w=10,U=10,h=1000,nthreads=4)
    ncp_plots = lgc.ncpplots.NCPPlots(df)
import localgraphclustering as lgc

outputPath = "../output/"
randFile = "time-rand.txt"
normFile = "time-norm.txt"
randqFile = "q-rand.txt"
normqFile = "q-norm.txt"
DatasetName = "JohnsHopkins"

ref_node = [3]
alphas = np.linspace(0.1, 0.5, 50)
rhos = np.logspace(-4, -7, 50)
epsilons = np.logspace(-2, -6, 50)

print("loading graph...")
g = lgc.GraphLocal("../../LocalGraphClustering/notebooks/datasets/{0}.edgelist".format(DatasetName))

def measureTimeEpsilon(methodName):
    for eps in epsilons:
        print("eps: ", eps)
        lgc.approximate_PageRank(g, ref_node, epsilon = eps, method = methodName)

def measureTimeAlpha(methodName):
    for alpha in alphas:
        print("alpha: ", alpha)
        lgc.approximate_PageRank(g, ref_node, alpha = alpha, method = methodName)

def measureTimeRho(methodName):
    for rho in rhos:
        print("rho: ", rho)
        lgc.approximate_PageRank(g, ref_node, rho = rho, method = methodName)
import numpy as np
import localgraphclustering as lgc
import matplotlib.pyplot as plt

from pagerank.bounded_accelerated_proximal_gradient_descent import BoundedAcceleratedProximalGradientDescent

if __name__ == "__main__":
    graph_name = '../data/ppi_mips.graphml'
    graph_type = 'graphml'

    alpha = 0.15
    epsilon = 1e-6
    rho = 1e-4
    graph = lgc.GraphLocal(graph_name, graph_type)
    seed_nodes = [0]

    solver = BoundedAcceleratedProximalGradientDescent(graph,
                                                       seed_nodes=seed_nodes,
                                                       alpha=alpha,
                                                       epsilon=epsilon,
                                                       rho=rho)
    solver.set_max_iter(100)
    solver.solve()
 def load_graph(self, fname, ftype='edgelist', separator='\t'):
     self.g = lgc.GraphLocal(fname, ftype, separator)
Exemple #18
0
def lgc_data(name):
    #lgc_path = os.path.join("..", "LocalGraphClustering")
    lgc_path = os.path.dirname(os.path.realpath(__file__))
    if name=="senate":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "senate.edgelist"), 'edgelist',  ' ')
    elif name=="Erdos02":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "Erdos02-cc.edgelist"), 'edgelist',  ' ')
    elif name=="dolphins":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "dolphins.smat"), 'edgelist',  ' ')
    elif name=="JohnsHopkins":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "JohnsHopkins.edgelist"), 'edgelist',  '\t')
    elif name=="Colgate88":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "Colgate88_reduced.graphml"), 'graphml')
    elif name=="usroads":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "usroads-cc.edgelist"), 'edgelist',  ' ')
    elif name=="ppi_mips":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "ppi_mips.graphml"), 'graphml')
    elif name=="ASTRAL":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets",
                    "ASTRAL-small-sized-mammoth-sims-geq-2.graphml"), 'graphml')
    elif name=="sfld":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets",
                    "sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh.graphml"), 'graphml')
    elif name=="find_V":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "find_V.graphml"), 'graphml')
    elif name=="ppi-h**o":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "ppi-h**o.edgelist"), 'edgelist', ' ', header=True)
    elif name=="neuro-fmri-01":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "neuro-fmri-01.edges"), 'edgelist', ' ', header=True)
    elif name=="ca-GrQc":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "ca-GrQc-cc.csv"), 'edgelist', ' ', header=True)
    elif name=="disconnected":
        return lgc.GraphLocal(os.path.join(
                lgc_path, "../../notebooks/datasets", "disconnected.smat"), 'edgelist',  ' ')
    else:
        raise Exception("Unknown graph name")
            si,ei = g.adjacency_matrix.indptr[node],g.adjacency_matrix.indptr[node+1]
            neighs = g.adjacency_matrix.indices[si:ei]
            for i in range(len(neighs)):
                if visited[neighs[i]] == 0:
                    visited[neighs[i]] = 1
                    seeds.append(neighs[i])
                    Q.put(neighs[i])
    return seeds



import sys
sys.path.append("../../../LocalGraphClustering/")
import localgraphclustering as lgc

G = lgc.GraphLocal("../../dataset/lawlor-spectra-k32.edgelist","edgelist")

import pandas as pd
from sklearn.neighbors import NearestNeighbors

df = pd.read_table("../../dataset/lawlor-spectra-k32.coords",header=None)
coords = df[[0,1]].values
coords[:,1] *= 4
coords[:,0] *= 10
x,y = -1*coords[:,0],-1*coords[:,1]

S = np.nonzero((x>0.002))[0]
records_flow = local_embedding(G,S,x,y,ntrials=500,delta=1.0,nprocs=120)

wptr = open("records_flow_002_1_node_3.p","wb")
pickle.dump(records_flow,wptr)
Exemple #20
0
import time
import matplotlib.pyplot as plt

import sys, os

data_path = os.getcwd()

try:
    import localgraphclustering as lgc
except:
    # when the package is not installed, import the local version instead.
    # the notebook must be placed in the original "notebooks/" folder
    sys.path.append("../")
    import localgraphclustering as lgc

g = lgc.GraphLocal(os.path.join(data_path, 'data/ppi_mips.graphml'), 'graphml')


def proximal_gradient_descent(A,
                              dn_sqrt,
                              d_sqrt,
                              Q,
                              ref_node,
                              rho,
                              alpha,
                              eps,
                              stepsize=1.):

    # Number of nodes in the graph
    n = dn_sqrt.shape[0]
    plt.show()
    pickle.dump(ncp,
                open('results/' + method + "-ncp-" + gname + '.pickle', 'wb'))
    ncp.write('results/' + method + "-ncp-csv-" + gname, writepython=False)
    pickle.dump(ncp2,
                open('results/' + method + "-ncp2-" + gname + '.pickle', 'wb'))
    ncp2.write('results/' + method + "-ncp2-csv-" + gname, writepython=False)


mygraphs = {  #'email-Enron':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/email-Enron.edgelist',
    #'pokec':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-pokec-relationships.edgelist',
    'ppi':
    '/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/ppi_mips.graphml',
    'sfld':
    '/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh.graphml'
}

for (gname, gfile) in mygraphs.items():
    print(gname, gfile)
    sep = ' '
    if isinstance(gfile, tuple):
        sep = gfile[1]
        gfile = gfile[0]
    g = lgc.GraphLocal(os.path.join("..", "data", gfile), 'graphml', "	")
    g.discard_weights()
    run_improve(g,
                gname=gname,
                method="mqi",
                methodname="MQI",
                delta=100,
                timeout=100000000)
def read_minnesota():
    g = localgraphclustering.GraphLocal('notebooks/datasets/minnesota.edgelist','edgelist',' ')
    ncp = lgc.NCPData(g,store_output_clusters=True)
    ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False)
    sets = [st["output_cluster"] for st in ncp.results]
    print("Make an NCP object for Improve Algo")
    ncp2 = lgc.NCPData(g)
    print("Going into improve mode")
    output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta})
    fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0]
    fig.axes[0].set_title(gname + " " + methodname+"-NCP")
    fig.savefig("figures/" + method + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100))
    plt.show()
    pickle.dump(ncp, open('results/' + method + "delta" + delta + "-ncp-" + gname + '.pickle', 'wb'))
    pickle.dump(ncp2, open('results/' + method + "delta" + delta + "-ncp2-" + gname + '.pickle', 'wb'))

mygraphs = {'email-Enron':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/email-Enron.edgelist',
            'pokec':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-pokec-relationships.edgelist',
            'livejournal':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-LiveJournal1.edgelist'
           }

start = time.time()
for (gname,gfile) in mygraphs.items():
    print(gname, gfile)
    sep = ' '
    if isinstance(gfile, tuple):
        sep = gfile[1]
        gfile = gfile[0]
    g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', "	")
    g.discard_weights()
    run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.6, timeout=100000000)
    end = time.time()
    print("Elapsed time for ", gname , " is ", end - start)
Exemple #24
0
# Import matplotlib
import matplotlib.pyplot as plt

import sys, traceback
import os

sys.path.insert(0, os.path.join("..", "LocalGraphClustering", "notebooks"))
import helper
import pickle
import csv

print("Running senate")

# Read graph. This also supports gml and graphml format.
g = lgc.GraphLocal('./datasets/senate.graphml', 'graphml')
g.discard_weights()

ncp_instance = lgc.NCPData(g)
ncp_instance.approxPageRank(ratio=0.8, timeout=5000000, nthreads=24)

ncp_plots = lgc.NCPPlots(ncp_instance, method_name="acl")
#plot conductance vs size
fig, ax, min_tuples = ncp_plots.cond_by_size()
plt.savefig('figures/cond_card_senate.png', bbox_inches='tight')
plt.show()
#plot conductance vs volume
fig, ax, min_tuples = ncp_plots.cond_by_vol()
plt.savefig('figures/cond_vol_senate.png', bbox_inches='tight')
plt.show()
#plot isoperimetry vs size