Example #1
0
def test_cliques():

    kgraph = CD.karate_club_graph()

    modcliques = CD.mod_cliques(kgraph)

    lincliques = CD.lin_cliques(kgraph)
def test_cliques():
    
    kgraph = CD.karate_club_graph()
    
    modcliques = CD.mod_cliques(kgraph)
    
    lincliques = CD.lin_cliques(kgraph)
Example #3
0
def vis_coauthor_communities(graph, source, i, prefix, options, radius,
                             overlap):
    """ Finds the communities produced by different methods for the astro
    citation network
    """
    interest = CD.get_ball(graph, options[source][i], radius)
    print "Displaying and computing for a subset of ", len(interest), " nodes."
    sgraph = nx.subgraph(graph, interest)

    cleaned = {}
    for key in options.keys():
        """ for generating sub community structure
        """
        if key == source:
            # split the overarching with the substructure
            cleaned[source] = [options[source][i]]
            options['Parallel Subcommunities'] = options[source][:i]
            options['Parallel Subcommunities'].extend(options[source][i + 1:])
            key = 'Parallel Subcommunities'

        filtered = [filter(lambda n: n in interest, c) for c in options[key]]
        filtered = filter(lambda c: len(c) > 0, filtered)
        cleaned[key] = filtered
        cleaned[key] = CD.clean_of_duplicate_c(cleaned[key], overlap=overlap)

    compare_methods(sgraph, prefix, options=cleaned)
def gen_cond_analysis():
    """ Generates all figures for the condensed matter network
    """
    cgraph = CD.coauthor_cond()
    cparam = [1., 1., 5./16966., 4, 10, .8, 300]
    cpath = "CommunityDetection/implementation/data/CollaborationNetworks/" +\
             "metis/condensed_metis"
    coptions = CD.all_detection_methods(cgraph, param=cparam, path=cpath)

    gen_csize_fig([0, 40, 80],
                  [0, .03],
                  [[len(c) for c in coptions[key]]
                   for key in ['Linearity Communities',
                               'Parallel Communities',
                               'Modularity Communities',
                               'Metis Communities']],
                   ['Linearity', 'Parallel', 'Modularity', 'Metis'],
                   'cond_',
                   ['r', 'k', 'b', 'g'])
    
    gen_overlap_fig([coptions['Linearity Communities'],
                     coptions['Parallel Communities']],
                    [0, 15, 20],
                    [0, 250, 500],
                    ['Linear', 'Parallel'],
                    'cond_linear_parallel',
                    ['r', 'k'])    
    return coptions
def find_football_communities():
    """ Finds the communities produced for the football network, uses compare
    methods to graph
    """
    fgraph = CD.football_graph()
    known = CD.football_known_c()
    temp7 = known[7]
    temp8 = known[8]
    temp9 = known[9]
    known[7] = temp8
    known[8] = temp9
    known[9] = temp7

    center_g = nx.Graph()
    center_g.add_nodes_from(range(12))
    centers = nx.circular_layout(center_g, scale = 10)
            
    pos = {}
    subgraphs = [nx.subgraph(fgraph, c) for c in known]
    count = -1
    for g in subgraphs:
        count += 1
        (off_x, off_y) = centers[count]
        pos_local = nx.circular_layout(g, scale=2.)
        for n, place in pos_local.iteritems():
            pos[n] = place + np.array([off_x, off_y])
    
    compare_methods(fgraph,
                    'football_',
                    param=[1., 1., 5./115., 4, 0, .7, 20],
                    known=known,
                    pos=pos,
                    color_map={76:1, 11:2, 7:3, 102:4, 104:5, 47:6, 98:7,
                               96:8, 23:9, 94:10, 27:0},
                    data_path="FootballGames/football_metis")
def vis_coauthor_communities(graph, source, i, prefix, options, radius, overlap):
    """ Finds the communities produced by different methods for the astro
    citation network
    """    
    interest = CD.get_ball(graph, options[source][i], radius)
    print "Displaying and computing for a subset of ", len(interest), " nodes."
    sgraph = nx.subgraph(graph, interest)
    
    cleaned = {}
    for key in options.keys():
        """ for generating sub community structure
        """
        if key == source:
            # split the overarching with the substructure
            cleaned[source] = [options[source][i]]
            options['Parallel Subcommunities'] = options[source][:i]
            options['Parallel Subcommunities'].extend(options[source][i+1:])
            key = 'Parallel Subcommunities'
        
        filtered = [filter(lambda n: n in interest, c) for c in options[key]]
        filtered = filter(lambda c: len(c) > 0, filtered)
        cleaned[key] = filtered
        cleaned[key] = CD.clean_of_duplicate_c(cleaned[key], overlap=overlap)

    compare_methods(sgraph, prefix, options=cleaned)
Example #7
0
def gen_relativity_analysis():
    """ Generates all figures in ch5 for the relativity coauthor network
    To get subcommunities, uncomment code in vis_coauthoer_communities
    """
    rgraph = CD.coauthor_relativity()
    rparam = [1., 5., 5. / 2694., 3, 10, .8, 200]
    rpath = "CommunityDetection/implementation/data/CollaborationNetworks/" +\
             "metis/relativity_metis"
    roptions = CD.all_detection_methods(rgraph, param=rparam, path=rpath)

    keys = [
        'Modularity Communities', 'Linearity Communities',
        'Parallel Communities', 'Modularity Communities',
        'Modularity Communities'
    ]
    c_id = [24, 108, 319, 10, 3]

    for i in []:  #range(len(c_id)):
        vis_coauthor_communities(rgraph, keys[i], c_id[i],
                                 'relativity_' + str(c_id[i]) + '_', roptions,
                                 0, .75)

    gen_csize_fig([0, 100, 200], [0, .05, .1],
                  [[len(c) for c in roptions[key]] for key in [
                      'Linearity Communities', 'Parallel Communities',
                      'Metis Communities', 'Modularity Communities'
                  ]], ['Linearity', 'Parallel', 'Metis', 'Modularity'],
                  'relativity_', ['r', 'k', 'b', 'g'])

    gen_overlap_fig(
        [roptions['Linearity Communities'], roptions['Parallel Communities']],
        [0, 15], [0, 200], ['Linear', 'Parallel'],
        'relativity_linear_parallel', ['r', 'k'])
    return roptions
def main():
    data_preprocess.split_data('./data/emails.csv')
    data_preprocess.convert_email()
    data_preprocess.build_corpus()
    data_preprocess.build_email_corpus_by_selected_person()
    word_cloud.create_word_cloud()
    word_cloud.output_word_frequencies(200)
    community_detection.output_gexf_file(graph_type="unweighted",
                                         min_degree=100,
                                         max_degree=200)
Example #9
0
def local_seed_communities(graph, core, r, d, method):
    """A helper function that does the heart of pulling out local communities.
    
    Parameters
    ---------
    graph : the overall graph
    core : a really dense subset of nodes to work from
    r : the radius out to consider
    d : the minimum degree connectivity to the core for a node to be considered
    method : 
    
    Returns
    -------
    possibleseeds : a list of lists, where each list is the core unioned with
    the communities found in the fractured ball of radius r around the core -
    all with using the modularity maximization method.
    
    Judgements
    ----------
    All judgement calls are passed in through r and d
    Additional judgement made for running linearity on abc parameters
    """
    
    possibleseeds = []
    
    ball = CD.get_ball(graph, core, r)
    subgraph = graph.subgraph(ball)
    
    # now need to filter out nodes that are not well enough connected to thecore
    degree = {}
    for n in core:
        for m in subgraph.neighbors(n):
            degree[m] = degree.get(m, 0) + 1
    for m in degree:
        if degree[m] < d:
            subgraph.remove_node(m)
    
    subgraph.remove_nodes_from(core)
    
    if subgraph.number_of_edges() != 0:
        if method == 'mod':
            split = CD.modularity_run(subgraph)
        else:
            split = CD.linearity_run(subgraph, 1., .01, .01)
            
        possibleseeds = CD.part_to_sets(split)
        for c in possibleseeds:
            c.extend(core)
            
    possibleseeds.append(core)
            
    return possibleseeds
                
                
def find_karate_communities():
    """ Finds the communities produced by different methods
    Uses compare to plot.
    """
    kgraph = CD.karate_club_graph()
    known = CD.karate_known_c()
    compare_methods(kgraph,
                    'karate_',
                    param=[1., 5., 3.5/34., 3, 0, .55, 34],
                    known=known, 
                    color_map={27:0, 1:2, 17:3, 25:4},
                    data_path="KarateClub/karate_metis")
Example #11
0
def gen_path_single(graph,
                    seed,
                    name,
                    ax,
                    metric,
                    comp,
                    ylim=[0, .3],
                    legend=False,
                    width=0.01,
                    param=None):
    """ Generates and plots the I E path and manages the space
    Parameters
    ----------
    graph : a networkx graph
    seed : a list of nodes to start from
    name : the title of the graph
    ax : the subplot to put everything
    metric : the single community metric to optimize
    comp : the comparison function either CD.compare_min or max
    legend : whether or not to show the legend
    width : the width of the arrow head
    """
    # plot metric optimized path
    (I_path, E_path, order) = CD.path_I_E(graph,
                                          seed[:],
                                          metric,
                                          comp,
                                          param=param)
    print "Last point for ", name, " I ", I_path[-1], " E ", E_path[-1]
    CD.plot_path(I_path[:], E_path[:], ax, 'r', name, width)

    # plot corner cases
    ax.plot(1, 0, 'kD', label='Ideal', markersize=10)
    (graph_I, graph_E) = CD.I_E(graph, graph.nodes())
    ax.plot(graph_I, 0, 'mD', label='Entire Graph', markersize=10)

    # set labels etc
    ax.set_title(name, fontsize=24)
    plt.xticks([0, .3, .7, 1], ['0', '0.3', '0.7', '1'])
    plt.yticks(ylim, [str(y) for y in ylim])
    ax.set_xlim(-0.01, 1.01)
    ax.set_ylim(ylim[0] - width / 2., ylim[1] + width / 2.)
    ax.set_xlabel(r'$I(C)$', fontsize=24)
    ax.set_ylabel(r'$E(C)$', fontsize=24)
    if legend:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels, loc=2)

    plt.show()

    # save the figure
    plt.savefig(name + ".eps")
    plt.savefig(name + ".pdf")
Example #12
0
def local_seed_communities(graph, core, r, d, method):
    """A helper function that does the heart of pulling out local communities.
    
    Parameters
    ---------
    graph : the overall graph
    core : a really dense subset of nodes to work from
    r : the radius out to consider
    d : the minimum degree connectivity to the core for a node to be considered
    method : 
    
    Returns
    -------
    possibleseeds : a list of lists, where each list is the core unioned with
    the communities found in the fractured ball of radius r around the core -
    all with using the modularity maximization method.
    
    Judgements
    ----------
    All judgement calls are passed in through r and d
    Additional judgement made for running linearity on abc parameters
    """

    possibleseeds = []

    ball = CD.get_ball(graph, core, r)
    subgraph = graph.subgraph(ball)

    # now need to filter out nodes that are not well enough connected to thecore
    degree = {}
    for n in core:
        for m in subgraph.neighbors(n):
            degree[m] = degree.get(m, 0) + 1
    for m in degree:
        if degree[m] < d:
            subgraph.remove_node(m)

    subgraph.remove_nodes_from(core)

    if subgraph.number_of_edges() != 0:
        if method == 'mod':
            split = CD.modularity_run(subgraph)
        else:
            split = CD.linearity_run(subgraph, 1., .01, .01)

        possibleseeds = CD.part_to_sets(split)
        for c in possibleseeds:
            c.extend(core)

    possibleseeds.append(core)

    return possibleseeds
Example #13
0
def gen_path_set(graph,
                 I_path,
                 E_path,
                 S_path,
                 name,
                 ylim=[0, 1],
                 legend=False,
                 width=0.01,
                 fig=False,
                 color='r'):
    """ Given an I E S path plots
    Parameters
    ----------
    I_path : a list of I(S) values
    E_path : a list of E(S) values
    S_path : a list of |S| values
    name : the title and saving file name
    ylim : the maximum value of E(S)
    legend : whether or not to show legend (not likely)
    width : the width of the arrow head
    """
    if not fig:
        fig = plt.figure()

    ax = fig.add_subplot(111)

    CD.plot_path(I_path[:], E_path[:], ax, color, name, width)

    # plot corner cases
    ax.plot(1, 0, 'kD', label='Ideal', markersize=10)
    (graph_I, graph_E) = CD.I_E(graph, graph.nodes())
    ax.plot(graph_I, 0, 'mD', label='Entire Graph', markersize=10)

    # set the dimensions and labels
    ax.set_title(name, fontsize=24)
    plt.xticks([0, .3, .7, 1], ['0', '0.3', '0.7', '1'])
    plt.yticks(ylim, [str(y) for y in ylim])
    ax.set_xlim(-.01, 1.01)
    ax.set_ylim(ylim[0] - width / 2., ylim[1] + width / 2.)
    ax.set_xlabel(r'$I(S)$', fontsize=24)
    ax.set_ylabel(r'$E(S)$', fontsize=24)
    if legend:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels, loc=2)

    plt.show()

    # save the figure
    plt.savefig(name + ".eps")
    plt.savefig(name + ".pdf")
def test_bar():
    """Tests the data structure Bar
    """

    print "Testing the Bar Structure"

    print "Testing initialization"
    fgraph = CD.football_graph()
    b = CD.Bar(fgraph, 1226, 1., 1., 1.)
    check_bar(b, 0, 1226, 115, 0)

    print "Testing possible swap"

    print "JTODO: Testing"
def test_linearity():
    
    print "Testing linearity on Karate for Cliques: "
    
    kgraph = CD.karate_club_graph()    
    part = CD.linearity_run(kgraph, 0.75, 1., .01)
    check_partition(part, 19)
    
    print "Testing linearity on football for Complete Cover: "
    
    fgraph = CD.football_graph()
    part = CD.linearity_run(fgraph, 0., 1., 0.)
    check_partition(part, 1)
    print "    Now for real analysis"
    part = CD.linearity_run(fgraph, 1., 1., .01)
    check_partition(part, 13)
    
    
    print "Testing linearity on physics archive for Connected Components: "
    pgraph = CD.physics_citations()  
    print "    Loaded Graph"  
    part = CD.linearity_run(pgraph, 0., 0., 0.0001)
    check_partition(part, 143)
    print "Testing linearity on physics archive for Cliques: "
    part = CD.linearity_run(pgraph, 1., 0., 0.0000001)
    check_partition(part, 13112)
Example #16
0
def draw_metric_change_vectors(metric, degree, fig=None, c=None):
    """ Draws the vector of possible changes in the metric by adding nodes
    
    """

    # create a subset of the football graph to work with
    graph = CD.football_graph()
    nodes = graph.nodes()
    if c == None:
        c = [nodes[random.randint(0, len(nodes) - 1)] for i in range(20)]
    else:
        c = c[:]

    ext_nodes = list(set(nodes) - set(c))

    if fig == None:
        fig = plt.figure()
        ax = fig.add_subplot(111)

    change_graph = nx.DiGraph()
    change_graph.add_node("center")
    pos = {
        "center": (CD.m_internal_density(graph,
                                         c), CD.m_external_density(graph, c))
    }
    value = {"center": metric(graph, c)}

    for i in range(degree + 1):
        n = ext_nodes[i]
        c.append(n)
        pos[n] = (CD.m_internal_density(graph,
                                        c), CD.m_external_density(graph, c))
        value[n] = metric(graph, c)
        change_graph.add_edge("center", n)
        c = c[:-1]

    inc = [20 + 15 * i for i in range(degree + 2)]
    rank = [(n, value[n]) for n in value.keys()]
    rank.sort(key=lambda v1: v1[1])
    order = {}
    for i in range(len(rank)):
        order[rank[i][0]] = i

    sizes = [inc[order[n]] for n in change_graph]
    nx.draw(change_graph,
            pos,
            node_color='b',
            node_size=sizes,
            with_labels=False)
def lin_cliques(graph):
    """Given a graph, finds the linearity partitions.  Then returns the cliques
       within each partition.
    """
    pf = open('cliques_within_lin_communities.pkl', 'wb')
    part = CD.linearity_run(graph, .5, .5, .001)
    c = CD.part_to_subgraphs(graph, part)
    cliques = []
    for subgraph in c:
        cliques.append((subgraph.nodes(), CD.find_sub_cliques(subgraph)))

    pickle.dump((graph, part, cliques), pf)
    pf.close()

    return cliques
def physics_citations():
    """Creates the graph papers citing other papers.

    Requires
    --------
    SNAP formulated data file: www.snap.stanford.edu/data/index.html

    Returns
    -------
    G : a NetworkX graph, where each node is a paper and edges indicate
        a citation within the paper to another paper
    """
    pgraph = load_graph(PHYSICS_CITATIONS, format_1)
    CD.clean_fragments(pgraph, 15)
    return pgraph
def lin_cliques(graph):
    """Given a graph, finds the linearity partitions.  Then returns the cliques
       within each partition.
    """
    pf = open('cliques_within_lin_communities.pkl', 'wb')
    part = CD.linearity_run(graph, .5, .5, .001)
    c = CD.part_to_subgraphs(graph, part)
    cliques = []
    for subgraph in c:
        cliques.append( (subgraph.nodes(), CD.find_sub_cliques(subgraph)) )
        
    pickle.dump( (graph, part, cliques), pf )
    pf.close()
        
    return cliques
def test_ball(subgraph, core, realcom):
    """Given a subgraph, a core, and the real communities.  Find what running
community detection on the ball of radius 1 returns.
    """
    subgraph.remove_nodes_from(core)

    part = CD.modularity_run(subgraph)

    sets = CD.part_to_sets(part)

    realistically = []
    for s in sets:
        realistically.append([realcom[n] for n in s])

    return realistically
def generate_votes(wgraph, communities, user_voting_record, elections):
    """ Generates Votes that would have happened if everyone in a community vote
    Paramemters
    -----------
    communities : a list of lists
    user_voting_record : a dictionary of user votes
    elections : the ids of elections to generate
    """
    e_voting_record = {}
    c_voting_record = [CD.community_to_elections(c, user_voting_record)
                       for c in communities]
    n_communities = {}
    for n in wgraph.nodes():
        n_communities[n] = filter(lambda i: n in communities[i],
                                  range(len(communities)))
    
    predicted = 0
    count = 0
    for e in elections:
        count += 1
        if count%100 == 0:
            print "Predicted", count, "elections"
            
        e_voting_record[e] = {-1:0, 0:0, 1:0}
        for n in wgraph.nodes():
            if e in user_voting_record[n]:
                e_voting_record[e][user_voting_record[n][e]] += 1
            else:
                n_vote = predict_vote(e, n_communities[n], c_voting_record)
                if n_vote != None:
                    predicted += 1
                    e_voting_record[e][n_vote] += 1
                    
    return e_voting_record, predicted
def clean_coauthor_network(graph, threshold):
    """ Given a collaboration network, removes low degree nodes. And adds the
    bridges those low degree nodes created between higher degree nodes.
    
    Parameters
    ----------
    graph : a networkx graph
    threshold : any node of degree less than threshold will be removed

    Method
    ------
    Uses the bridges.  A node is a bridge if it connects two node of degree
    threshold or higher that are not otherwise connected.  Removes the node and
    adds an edge between the higher degree nodes to maintain the graph
    structure.
    """
    
    toremove = filter(lambda n: graph.degree(n) < threshold, graph.nodes_iter())
    
    bridges = CD.find_bridges(graph, threshold)
    
    print "Cleaned graph by removing ", len(toremove), " nodes."
    print "Cleaned graph by adding ", len(bridges), " bridges."
    
    graph.remove_nodes_from(toremove)
    graph.add_edges_from(bridges, weight=1.)
def all_cliques(graph, count):
    """ Now, given a large graph, sample the cliques and test for homogeneity
    Parameters
    ----------
    graph : a networkx graph
    
    Method
    ------
    * creates a mapping from nodes to communities
    * uses networkx to generate several cliques and maps the clique members to
      communities, if the clique has at least 4 members
    """
    pf = open('cliques_within_the_graph.pkl', 'wb')

    part = CD.modularity_run(graph)
    cgen = nx.find_cliques(graph)
    found = []

    for i in xrange(count):
        try:
            clump = cgen.next()
            if len(clump) > 2:
                found.append(([part[n] for n in clump], clump))
        except:
            pickle.dump((graph, part, found), pf)
            pf.close()
            return found

    pickle.dump((graph, part, found), pf)
    pf.close()
    return found
def test_ball(subgraph, core, realcom):
    """Given a subgraph, a core, and the real communities.  Find what running
community detection on the ball of radius 1 returns.
    """
    subgraph.remove_nodes_from(core)
    
    part = CD.modularity_run(subgraph)
    
    sets = CD.part_to_sets(part)
    
    realistically = []
    for s in sets:
        realistically.append([realcom[n] for n in s])
        
        
    return realistically
Example #25
0
def learn_parameters(G):
    tr_list = [0, 5, 10]
    h_list = [1, 10, 20, 50, 100]
    d_list = [0, 5, 10]
    p_list = [100, 10000, 10000000000000000]

    repeat = 10
    results = []
    for tr in tr_list:
        for h in h_list:
            for d in d_list:
                for p in p_list:
                    temp = []
                    for i in range(repeat):
                        cdr = cd.CommunityDetector(G, tr, h, d, p)
                        cdr.run()
                        temp.append(cdr.compute_accuracy(False))
                    results.append((sum(temp) / float(repeat), {
                        "tr": tr,
                        "h": h,
                        "d": d,
                        "p": p
                    }))

    sorted_results = sorted(results)[-10:]
    pprint.pprint(sorted_results)
def all_cliques(graph, count):
    """ Now, given a large graph, sample the cliques and test for homogeneity
    Parameters
    ----------
    graph : a networkx graph
    
    Method
    ------
    * creates a mapping from nodes to communities
    * uses networkx to generate several cliques and maps the clique members to
      communities, if the clique has at least 4 members
    """
    pf = open('cliques_within_the_graph.pkl', 'wb')
    
    part = CD.modularity_run(graph)
    cgen = nx.find_cliques(graph)
    found = []
    
    for i in xrange(count):
        try:
            clump = cgen.next()
            if len(clump) > 2:
                found.append( ([part[n] for n in clump], clump) )
        except:
            pickle.dump( (graph, part, found) , pf)
            pf.close()
            return found
            
    pickle.dump( (graph, part, found) , pf)
    pf.close()
    return found
def clean_coauthor_network(graph, threshold):
    """ Given a collaboration network, removes low degree nodes. And adds the
    bridges those low degree nodes created between higher degree nodes.
    
    Parameters
    ----------
    graph : a networkx graph
    threshold : any node of degree less than threshold will be removed

    Method
    ------
    Uses the bridges.  A node is a bridge if it connects two node of degree
    threshold or higher that are not otherwise connected.  Removes the node and
    adds an edge between the higher degree nodes to maintain the graph
    structure.
    """

    toremove = filter(lambda n: graph.degree(n) < threshold,
                      graph.nodes_iter())

    bridges = CD.find_bridges(graph, threshold)

    print "Cleaned graph by removing ", len(toremove), " nodes."
    print "Cleaned graph by adding ", len(bridges), " bridges."

    graph.remove_nodes_from(toremove)
    graph.add_edges_from(bridges, weight=1.)
Example #28
0
def generate_votes(wgraph, communities, user_voting_record, elections):
    """ Generates Votes that would have happened if everyone in a community vote
    Paramemters
    -----------
    communities : a list of lists
    user_voting_record : a dictionary of user votes
    elections : the ids of elections to generate
    """
    e_voting_record = {}
    c_voting_record = [
        CD.community_to_elections(c, user_voting_record) for c in communities
    ]
    n_communities = {}
    for n in wgraph.nodes():
        n_communities[n] = filter(lambda i: n in communities[i],
                                  range(len(communities)))

    predicted = 0
    count = 0
    for e in elections:
        count += 1
        if count % 100 == 0:
            print "Predicted", count, "elections"

        e_voting_record[e] = {-1: 0, 0: 0, 1: 0}
        for n in wgraph.nodes():
            if e in user_voting_record[n]:
                e_voting_record[e][user_voting_record[n][e]] += 1
            else:
                n_vote = predict_vote(e, n_communities[n], c_voting_record)
                if n_vote != None:
                    predicted += 1
                    e_voting_record[e][n_vote] += 1

    return e_voting_record, predicted
Example #29
0
def find_karate_communities():
    """ Finds the communities produced by different methods
    Uses compare to plot.
    """
    kgraph = CD.karate_club_graph()
    known = CD.karate_known_c()
    compare_methods(kgraph,
                    'karate_',
                    param=[1., 5., 3.5 / 34., 3, 0, .55, 34],
                    known=known,
                    color_map={
                        27: 0,
                        1: 2,
                        17: 3,
                        25: 4
                    },
                    data_path="KarateClub/karate_metis")
Example #30
0
def find_football_communities():
    """ Finds the communities produced for the football network, uses compare
    methods to graph
    """
    fgraph = CD.football_graph()
    known = CD.football_known_c()
    temp7 = known[7]
    temp8 = known[8]
    temp9 = known[9]
    known[7] = temp8
    known[8] = temp9
    known[9] = temp7

    center_g = nx.Graph()
    center_g.add_nodes_from(range(12))
    centers = nx.circular_layout(center_g, scale=10)

    pos = {}
    subgraphs = [nx.subgraph(fgraph, c) for c in known]
    count = -1
    for g in subgraphs:
        count += 1
        (off_x, off_y) = centers[count]
        pos_local = nx.circular_layout(g, scale=2.)
        for n, place in pos_local.iteritems():
            pos[n] = place + np.array([off_x, off_y])

    compare_methods(fgraph,
                    'football_',
                    param=[1., 1., 5. / 115., 4, 0, .7, 20],
                    known=known,
                    pos=pos,
                    color_map={
                        76: 1,
                        11: 2,
                        7: 3,
                        102: 4,
                        104: 5,
                        47: 6,
                        98: 7,
                        96: 8,
                        23: 9,
                        94: 10,
                        27: 0
                    },
                    data_path="FootballGames/football_metis")
def find_dolphin_communities():
    """ Finds the communities produced by different methods for dolphins
    """
    dgraph = CD.dolphins()
    compare_methods(dgraph,
                    'dolphins_',
                    param=[1., 5., 3.5/62., 3, 0, .55, 62],
                    data_path="Dolphins/dolphins")
Example #32
0
def find_dolphin_communities():
    """ Finds the communities produced by different methods for dolphins
    """
    dgraph = CD.dolphins()
    compare_methods(dgraph,
                    'dolphins_',
                    param=[1., 5., 3.5 / 62., 3, 0, .55, 62],
                    data_path="Dolphins/dolphins")
def vis_local_com(communities, n, graph, sub_graph):
    """Visualize the local community structure.
    
    Parameters
    ----------
    communities : a list of lists, where internal lists are the communities
    n : the central node around which the communities are centered
    graph : the local subgraph around n to plot
    
    """
    if type(communities) == list:
        communities = CD.sets_to_part(communities)
        
    community_names = []
    for m in graph:
        community_names.extend(communities[m])
        
    # let's see how popular these communities are.
    pop_com_names = [(c, community_names.count(c))
                        for c in set(community_names)]
                        
    community_names = filter(lambda p: p[1] > 1, pop_com_names)
    community_names = sorted(community_names, key=lambda p: p[1], reverse=True)
    community_names = [c for c,p in community_names]
        
    c_colors = {}
    i = 0.
    for c in community_names:
        c_colors[c] = i
        i += 1.
        
    node_colors = []
    for m in graph:
        if len(communities[m]) > 1: # find the most popular
            found = False
            for c in community_names:
                if c in communities[m]:
                    node_colors.append(c_colors[c])
                    found = True
                    break
            if not found:
                node_colors.append(-2)
        elif communities[m][0] not in community_names:
            node_colors.append(-2)
        else:
            node_colors.append(c_colors[communities[m][0]])
    
    pos = nx.spring_layout(graph, pos={n:np.array([0., 0.])})
    
    fig = plt.figure()
    ax = fig.add_subplot(121)
    nx.draw(graph, pos, node_color=node_colors, alpha=0.4)
    ax = fig.add_subplot(122)
    nx.draw(sub_graph, alpha=0.4)
    
    plt.show()
    
    return node_colors
def vis_local_com(communities, n, graph, sub_graph):
    """Visualize the local community structure.
    
    Parameters
    ----------
    communities : a list of lists, where internal lists are the communities
    n : the central node around which the communities are centered
    graph : the local subgraph around n to plot
    
    """
    if type(communities) == list:
        communities = CD.sets_to_part(communities)

    community_names = []
    for m in graph:
        community_names.extend(communities[m])

    # let's see how popular these communities are.
    pop_com_names = [(c, community_names.count(c))
                     for c in set(community_names)]

    community_names = filter(lambda p: p[1] > 1, pop_com_names)
    community_names = sorted(community_names, key=lambda p: p[1], reverse=True)
    community_names = [c for c, p in community_names]

    c_colors = {}
    i = 0.
    for c in community_names:
        c_colors[c] = i
        i += 1.

    node_colors = []
    for m in graph:
        if len(communities[m]) > 1:  # find the most popular
            found = False
            for c in community_names:
                if c in communities[m]:
                    node_colors.append(c_colors[c])
                    found = True
                    break
            if not found:
                node_colors.append(-2)
        elif communities[m][0] not in community_names:
            node_colors.append(-2)
        else:
            node_colors.append(c_colors[communities[m][0]])

    pos = nx.spring_layout(graph, pos={n: np.array([0., 0.])})

    fig = plt.figure()
    ax = fig.add_subplot(121)
    nx.draw(graph, pos, node_color=node_colors, alpha=0.4)
    ax = fig.add_subplot(122)
    nx.draw(sub_graph, alpha=0.4)

    plt.show()

    return node_colors
def gen_path_single(graph, seed, name, ax, metric, comp, ylim=[0, .3],
                    legend=False, width=0.01, param=None): 
    """ Generates and plots the I E path and manages the space
    Parameters
    ----------
    graph : a networkx graph
    seed : a list of nodes to start from
    name : the title of the graph
    ax : the subplot to put everything
    metric : the single community metric to optimize
    comp : the comparison function either CD.compare_min or max
    legend : whether or not to show the legend
    width : the width of the arrow head
    """
    # plot metric optimized path
    (I_path, E_path, order) = CD.path_I_E(graph,
                                          seed[:],
                                          metric,
                                          comp,
                                          param=param)
    print "Last point for ", name, " I ", I_path[-1], " E ", E_path[-1]
    CD.plot_path(I_path[:], E_path[:], ax, 'r', name, width)
    
    # plot corner cases
    ax.plot(1, 0, 'kD', label='Ideal', markersize=10)
    (graph_I, graph_E) = CD.I_E(graph, graph.nodes())
    ax.plot(graph_I, 0, 'mD', label='Entire Graph', markersize=10)   
    
    # set labels etc
    ax.set_title(name, fontsize=24)    
    plt.xticks([0, .3, .7, 1], ['0', '0.3', '0.7', '1'])
    plt.yticks(ylim, [str(y) for y in ylim])
    ax.set_xlim(-0.01, 1.01)
    ax.set_ylim(ylim[0] - width/2., ylim[1] + width/2.)
    ax.set_xlabel(r'$I(C)$', fontsize=24)
    ax.set_ylabel(r'$E(C)$', fontsize=24)
    if legend:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels, loc=2)
    
    plt.show() 
    
    # save the figure
    plt.savefig(name + ".eps")
    plt.savefig(name + ".pdf")
def draw_metric_change_vectors(metric, degree, fig=None, c=None):
    """ Draws the vector of possible changes in the metric by adding nodes
    
    """
    
    # create a subset of the football graph to work with
    graph = CD.football_graph()
    nodes = graph.nodes()
    if c == None:
        c = [nodes[random.randint(0, len(nodes) - 1)] for i in range(20)]
    else:
        c = c[:]
        
    ext_nodes = list(set(nodes) - set(c))
    
    if fig == None:
        fig = plt.figure(); ax = fig.add_subplot(111);
        
    change_graph = nx.DiGraph()
    change_graph.add_node("center")
    pos = {"center":(CD.m_internal_density(graph, c), CD.m_external_density(graph, c))}
    value = {"center":metric(graph, c)}
    
    for i in range(degree+1):
        n = ext_nodes[i]
        c.append(n)
        pos[n] = (CD.m_internal_density(graph, c), CD.m_external_density(graph, c))
        value[n] = metric(graph, c)
        change_graph.add_edge("center", n)
        c = c[:-1]
        
    inc = [20 + 15*i for i in range(degree+2)]
    rank = [(n, value[n]) for n in value.keys()]
    rank.sort(key=lambda v1: v1[1])
    order = {}
    for i in range(len(rank)):
        order[rank[i][0]] = i
    
    sizes = [inc[order[n]] for n in change_graph]
    nx.draw(change_graph,
            pos,
            node_color='b',
            node_size=sizes,
            with_labels=False)
        
def gen_relativity_analysis():
    """ Generates all figures in ch5 for the relativity coauthor network
    To get subcommunities, uncomment code in vis_coauthoer_communities
    """
    rgraph = CD.coauthor_relativity()
    rparam = [1., 5., 5./2694., 3, 10, .8, 200]
    rpath = "CommunityDetection/implementation/data/CollaborationNetworks/" +\
             "metis/relativity_metis"
    roptions = CD.all_detection_methods(rgraph,
                                        param=rparam,
                                        path=rpath)  
    
    keys = ['Modularity Communities', 'Linearity Communities',
            'Parallel Communities', 'Modularity Communities',
            'Modularity Communities']
    c_id = [24, 108, 319, 10, 3]
    
    for i in []: #range(len(c_id)):
        vis_coauthor_communities(rgraph,
                                  keys[i],
                                  c_id[i],
                                  'relativity_' + str(c_id[i]) + '_',
                                  roptions,
                                  0,
                                  .75)
                              
    gen_csize_fig([0, 100, 200],
                  [0, .05, .1],
                  [[len(c) for c in roptions[key]]
                   for key in ['Linearity Communities',
                               'Parallel Communities',
                               'Metis Communities',
                               'Modularity Communities']],
                   ['Linearity', 'Parallel', 'Metis', 'Modularity'],
                   'relativity_',
                   ['r', 'k', 'b', 'g'])
    
    gen_overlap_fig([roptions['Linearity Communities'],
                     roptions['Parallel Communities']],
                    [0, 15],
                    [0, 200],
                    ['Linear', 'Parallel'],
                    'relativity_linear_parallel',
                    ['r', 'k'])
    return roptions
def test_bottle():
    """ Tests the data structure Bucket
    """

    print "Testing the Bottle Structure."

    print "Testing initialization:"
    kgraph = CD.karate_club_graph()
    b = CD.Bottle('sally', kgraph, [1, 2, 3, 4])
    check_bottle(b, 'sally', 12, 29, 4, 12)

    print "Testing add node:"
    b.add_member(kgraph, 31)
    check_bottle(b, 'sally', 14, 31, 5, 20)

    print "Testing remove node:"
    b.remove_member(kgraph, 31)
    check_bottle(b, 'sally', 12, 29, 4, 12)
def create_community_candidates(graph, subset):
    """ Creates the community and candidate corresponding to the subset
    Parameters
    ----------
    graph : a networkx graph
    subset : a list of nodes
    
    Returns
    -------
    cs : a Community corresponding to the subset nodes
    cand : the coupled outside nodes
    """
    cs = CD.Community()
    external_nodes = cs.init(graph, subset)
    cand = CD.Candidates(graph, external_nodes, cs)
    cs.init_bounds(cand)
    cand.rework_fringe()
    return cs, cand
def gen_path_set(graph, I_path, E_path, S_path, name, ylim=[0, 1],
                 legend=False, width=0.01, fig=False, color='r'):
    """ Given an I E S path plots
    Parameters
    ----------
    I_path : a list of I(S) values
    E_path : a list of E(S) values
    S_path : a list of |S| values
    name : the title and saving file name
    ylim : the maximum value of E(S)
    legend : whether or not to show legend (not likely)
    width : the width of the arrow head
    """
    if not fig:
        fig = plt.figure()
        
    ax = fig.add_subplot(111)
    
    CD.plot_path(I_path[:], E_path[:], ax, color, name, width)   
    
    # plot corner cases
    ax.plot(1, 0, 'kD', label='Ideal', markersize=10)
    (graph_I, graph_E) = CD.I_E(graph, graph.nodes())
    ax.plot(graph_I, 0, 'mD', label='Entire Graph', markersize=10)       

    # set the dimensions and labels
    ax.set_title(name, fontsize=24)    
    plt.xticks([0, .3, .7, 1], ['0', '0.3', '0.7', '1'])
    plt.yticks(ylim, [str(y) for y in ylim])
    ax.set_xlim(-.01, 1.01)
    ax.set_ylim(ylim[0] - width/2., ylim[1] + width/2.)
    ax.set_xlabel(r'$I(S)$', fontsize=24)
    ax.set_ylabel(r'$E(S)$', fontsize=24)
    if legend:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels, loc=2)
    
    plt.show() 
    
    # save the figure
    plt.savefig(name + ".eps")
    plt.savefig(name + ".pdf")  
def plot_set(ax, graph, group, color):
    """ Plots the (I(C), E(C)) points for all communities in the group
    """
    I_values = []
    E_values = []
    for c in group:
        I, E, rubish = CD.I_E(graph, c)
        E_values.append(E)
        I_values.append(I)

    ax.plot(I_values, E_values, color + 'o')
def mod_cliques(graph):
    """Given a graph, find the modularity partitions.  Then return the cliques
       within each partition.
       
    Returns
    ------
    saves the work in a pickle file in the format of (graph, partition,
    cliques), where cliques is a tuple of (nodes considered, [all maximal
    cliques among the nodes])
    """
    pf = open('cliques_within_mod_communities.pkl', 'wb')
    part = CD.modularity_run(graph)
    c = CD.part_to_subgraphs(graph, part)
    cliques = []
    for subgraph in c:
        cliques.append((subgraph.nodes(), CD.find_sub_cliques(subgraph)))

    pickle.dump((graph, part, cliques), pf)
    pf.close()

    return cliques
def mod_cliques(graph):
    """Given a graph, find the modularity partitions.  Then return the cliques
       within each partition.
       
    Returns
    ------
    saves the work in a pickle file in the format of (graph, partition,
    cliques), where cliques is a tuple of (nodes considered, [all maximal
    cliques among the nodes])
    """
    pf = open('cliques_within_mod_communities.pkl', 'wb')
    part = CD.modularity_run(graph)    
    c = CD.part_to_subgraphs(graph, part)
    cliques = []
    for subgraph in c:
        cliques.append( (subgraph.nodes(), CD.find_sub_cliques(subgraph)) )
        
    pickle.dump((graph, part, cliques), pf)
    pf.close()
    
    return cliques
Example #44
0
def gen_cond_analysis():
    """ Generates all figures for the condensed matter network
    """
    cgraph = CD.coauthor_cond()
    cparam = [1., 1., 5. / 16966., 4, 10, .8, 300]
    cpath = "CommunityDetection/implementation/data/CollaborationNetworks/" +\
             "metis/condensed_metis"
    coptions = CD.all_detection_methods(cgraph, param=cparam, path=cpath)

    gen_csize_fig([0, 40, 80], [0, .03],
                  [[len(c) for c in coptions[key]] for key in [
                      'Linearity Communities', 'Parallel Communities',
                      'Modularity Communities', 'Metis Communities'
                  ]], ['Linearity', 'Parallel', 'Modularity', 'Metis'],
                  'cond_', ['r', 'k', 'b', 'g'])

    gen_overlap_fig(
        [coptions['Linearity Communities'], coptions['Parallel Communities']],
        [0, 15, 20], [0, 250, 500], ['Linear', 'Parallel'],
        'cond_linear_parallel', ['r', 'k'])
    return coptions
def gen_archivex():
    """ Generates the plots corresponding to the physics archivx
    """
    pgraph = CD.physics_citations()
    pparam_big = [0., 0., 1/30000., 5, 200, .8, 900]
    ppath = "CommunityDetection/implementation/data/" +\
             "PhysicsArchive/archivx_metis"
    poptions_big = CD.all_detection_methods(pgraph, pparam_big, ppath)
    
    pf = open('big_c_physics_2', 'wb')
    pickle.dump(poptions_big, pf)
    pf.close()
    """
    pparam_small = [0., 0., 1/30000., 3, 20, .8, 100] 
    poptions_small = CD.all_detection_methods(pgraph, pparam_small, ppath)
    
    pf = open('small_c_physics', 'wb')
    pickle.dump(poptions_small, pf)
    pf.close()    
    
    gen_csize_fig([0, 40, 80],
                  [0, .03],
                  [[len(c) for c in poptions[key]]
                   for key in ['Linearity Communities',
                               'Parallel Communities',
                               'Modularity Communities',
                               'Metis Communities']],
                   ['Linearity', 'Parallel', 'Modularity', 'Metis'],
                   'archivx_',
                   ['r', 'k', 'b', 'g'])
    gen_overlap_fig([poptions['Linearity Communities'],
                     poptions['Parallel Communities']],
                    [0, 15, 20],
                    [0, 250, 500],
                    ['Linear', 'Parallel'],
                    'archivex_linear_parallel',
                    ['r', 'k'])    
    """
    
    return poptions_big #, poptions_small
Example #46
0
def gen_archivex():
    """ Generates the plots corresponding to the physics archivx
    """
    pgraph = CD.physics_citations()
    pparam_big = [0., 0., 1 / 30000., 5, 200, .8, 900]
    ppath = "CommunityDetection/implementation/data/" +\
             "PhysicsArchive/archivx_metis"
    poptions_big = CD.all_detection_methods(pgraph, pparam_big, ppath)

    pf = open('big_c_physics_2', 'wb')
    pickle.dump(poptions_big, pf)
    pf.close()
    """
    pparam_small = [0., 0., 1/30000., 3, 20, .8, 100] 
    poptions_small = CD.all_detection_methods(pgraph, pparam_small, ppath)
    
    pf = open('small_c_physics', 'wb')
    pickle.dump(poptions_small, pf)
    pf.close()    
    
    gen_csize_fig([0, 40, 80],
                  [0, .03],
                  [[len(c) for c in poptions[key]]
                   for key in ['Linearity Communities',
                               'Parallel Communities',
                               'Modularity Communities',
                               'Metis Communities']],
                   ['Linearity', 'Parallel', 'Modularity', 'Metis'],
                   'archivx_',
                   ['r', 'k', 'b', 'g'])
    gen_overlap_fig([poptions['Linearity Communities'],
                     poptions['Parallel Communities']],
                    [0, 15, 20],
                    [0, 250, 500],
                    ['Linear', 'Parallel'],
                    'archivex_linear_parallel',
                    ['r', 'k'])    
    """

    return poptions_big  #, poptions_small
Example #47
0
def check_votes_agree(wgraph, communities, user_voting_record):
    """ For every node in a community, if other members of the community voted
    checks whether or not our predictive model would agree with how they voted
    Parameters
    ----------
    wgraph : the wiki graph
    communities : a list of lists
    user_voting_record : a dictionary of user votes
    
    Returns
    -------
    no_say : number of votes that could not be verified
    correct_say : the number of votes that were correctly guessed
    wrong_say : the number of votes that were incorrectly guessed
    """
    no_say = 0
    no_guess = 0
    correct_say = 0
    wrong_say = 0
    wrong_data = []

    c_votes = [
        CD.community_to_elections(c, user_voting_record) for c in communities
    ]

    for n in wgraph:
        n_communities = filter(lambda i: n in communities[i],
                               range(len(communities)))
        for e in user_voting_record[n].keys():
            c_guess = []
            for i in n_communities:
                if sum(c_votes[i][e].values()) > 1:
                    c_votes[i][e][user_voting_record[n][e]] -= 1
                    c_guess.append(guess_c_vote(c_votes[i][e]))
                    c_votes[i][e][user_voting_record[n][e]] += 1

            if len(c_guess) == 0:
                # there were no comparables to use
                no_guess += 1
            else:
                # had a comparable
                guess = guess_n_vote(c_guess)
                if guess == None:
                    no_say += 1
                elif guess == user_voting_record[n][e]:
                    correct_say += 1
                else:
                    wrong_data.append(
                        (guess, user_voting_record[n][e], c_guess))
                    wrong_say += 1

    return no_say, correct_say, wrong_say, no_guess, wrong_data
Example #48
0
def get_int_counts(graph, sets):
    """ Finds the initial values of the internal and external densities
    """
    n_int_edges = 0
    base_int_edges = 0
    ext_edges = set(graph.edges())
    for c in sets:
        c_int_edges = CD.get_internal_edges(graph, c)
        ext_edges -= set(c_int_edges)
        n_int_edges += len(c_int_edges)
        base_int_edges += len(c) * (len(c) - 1)

    return n_int_edges, base_int_edges, ext_edges
def get_int_counts(graph, sets):
    """ Finds the initial values of the internal and external densities
    """
    n_int_edges = 0
    base_int_edges = 0
    ext_edges = set(graph.edges())
    for c in sets:
        c_int_edges = CD.get_internal_edges(graph, c)
        ext_edges -= set(c_int_edges)
        n_int_edges += len(c_int_edges)
        base_int_edges += len(c) * (len(c) - 1)
    
    return n_int_edges, base_int_edges, ext_edges
def check_votes_agree(wgraph, communities, user_voting_record):
    """ For every node in a community, if other members of the community voted
    checks whether or not our predictive model would agree with how they voted
    Parameters
    ----------
    wgraph : the wiki graph
    communities : a list of lists
    user_voting_record : a dictionary of user votes
    
    Returns
    -------
    no_say : number of votes that could not be verified
    correct_say : the number of votes that were correctly guessed
    wrong_say : the number of votes that were incorrectly guessed
    """
    no_say = 0
    no_guess = 0
    correct_say = 0
    wrong_say = 0
    wrong_data = []
    
    c_votes = [CD.community_to_elections(c, user_voting_record)
               for c in communities]
    
    for n in wgraph:            
        n_communities = filter(lambda i: n in communities[i],
                               range(len(communities)))
        for e in user_voting_record[n].keys():
            c_guess = []
            for i in n_communities:
                if sum(c_votes[i][e].values()) > 1:
                    c_votes[i][e][user_voting_record[n][e]] -= 1
                    c_guess.append(guess_c_vote(c_votes[i][e]))
                    c_votes[i][e][user_voting_record[n][e]] += 1
                
            if len(c_guess) == 0:
                # there were no comparables to use
                no_guess += 1
            else:
                # had a comparable
                guess = guess_n_vote(c_guess)
                if guess == None:
                    no_say += 1
                elif guess == user_voting_record[n][e]:
                    correct_say += 1
                else:
                    wrong_data.append((guess, user_voting_record[n][e], c_guess))
                    wrong_say += 1
                
    return no_say, correct_say, wrong_say, no_guess, wrong_data
def plot_dendo(dendo, graph):
    """ Plots the (I(S), E(S)) points for all stages of the dendo
    """
    colors = ["r", "g", "b", "k", "c", "m"]

    fig = plt.figure()
    ax = fig.add_subplot(111)

    groups = CD.dendo_to_hierarchy(dendo)

    for i in range(len(groups) - 1):
        plot_set(ax, graph, groups[i].values(), colors[i])

    plt.show()
def why_parallel_karate():
    """ A plot to explain why the parallel method does not work well on small
    graphs
    
    Could use with some mods
    """
    graph = CD.karate_club_graph()
    known = CD.karate_known_c()  
  
    communities = []
    candidates = []
  
    for c in known:
        comm = CD.Community()
        ext_nodes = comm.init(graph, c)
        cand = CD.Candidates(graph, ext_nodes, comm)
        comm.init_bounds(cand)
        cand.rework_fringe()
        communities.append(comm)
        candidates.append(cand)
        
    for c, cand in zip(communities, candidates):
        CD.vis_e_p(graph, c, cand)    
def plot_dendo(dendo, graph):
    """ Plots the (I(S), E(S)) points for all stages of the dendo
    """
    colors = ['r', 'g', 'b', 'k', 'c', 'm']

    fig = plt.figure()
    ax = fig.add_subplot(111)

    groups = CD.dendo_to_hierarchy(dendo)

    for i in range(len(groups) - 1):
        plot_set(ax, graph, groups[i].values(), colors[i])

    plt.show()
def test_bar():
    """Tests the data structure Bar
    """
    
    print "Testing the Bar Structure"
    
    print "Testing initialization"
    fgraph = CD.football_graph()
    b = CD.Bar(fgraph, 1226, 1., 1., 1.)
    check_bar(b, 0, 1226, 115, 0)
    
    print "Testing possible swap"
    
    
    print "JTODO: Testing"
def hack_to_load():
    """ A hack to reload the communities after a crash
    """
    pf = open('big_c_physics_2', 'rb')
    big_c = pickle.load(pf)
    pf.close()
    pf = open('small_c_physics', 'rb')
    small_c = pickle.load(pf)
    pf.close()
    
    p_com = big_c['Parallel Communities']
    p_com.extend(small_c['Parallel Communities'])
    
    p_com = CD.clean_of_duplicate_c(p_com, .25)
    
    return p_com, small_c['Modularity Communities'], small_c['Metis Communities']
Example #56
0
def distant_seeds(graph, method='mod', min_size=4):
    """Finds seeds that are far apart in the graph
    Parameters
    ----------
    graph : a networkx graph
    method : optional input for which local community method to find
    
    Method
    ------
    For a decreasing min clique size, finds a clique of that size, finds the
    best seed involving that clique, appends it, and removes the ball of radius 1
    from the graph.  Then repeats until there are no more cliques of that size
    and terminates when the graph is empty.
    """

    graph = graph.copy()
    seeds = []
    clique_size = 20
    
    while graph.number_of_nodes() > 0 and clique_size >= min_size:
        more = True
        gen_cliques = nx.find_cliques(graph)
        clique = gen_cliques.next()
        while more:
            if len(clique)>= clique_size:
                #print "     found seed", len(seeds), graph.number_of_nodes()
                more = False
                possibleseeds = local_seed_communities(graph,
                                                       clique,
                                                       1,
                                                       0.5 * len(clique),
                                                       method)
                possibleseeds.sort(reverse=True)
                seeds.append(possibleseeds[0])
                graph.remove_nodes_from(CD.get_ball(graph, possibleseeds[0], 1))
            
            try:
                clique = gen_cliques.next()
            except:
                break
            
        if more:
            print "ran through that size of clique ", clique_size
            clique_size -= 1
    
    return seeds
Example #57
0
def hack_to_load():
    """ A hack to reload the communities after a crash
    """
    pf = open('big_c_physics_2', 'rb')
    big_c = pickle.load(pf)
    pf.close()
    pf = open('small_c_physics', 'rb')
    small_c = pickle.load(pf)
    pf.close()

    p_com = big_c['Parallel Communities']
    p_com.extend(small_c['Parallel Communities'])

    p_com = CD.clean_of_duplicate_c(p_com, .25)

    return p_com, small_c['Modularity Communities'], small_c[
        'Metis Communities']
Example #58
0
def distant_seeds(graph, method='mod', min_size=4):
    """Finds seeds that are far apart in the graph
    Parameters
    ----------
    graph : a networkx graph
    method : optional input for which local community method to find
    
    Method
    ------
    For a decreasing min clique size, finds a clique of that size, finds the
    best seed involving that clique, appends it, and removes the ball of radius 1
    from the graph.  Then repeats until there are no more cliques of that size
    and terminates when the graph is empty.
    """

    graph = graph.copy()
    seeds = []
    clique_size = 20

    while graph.number_of_nodes() > 0 and clique_size >= min_size:
        more = True
        gen_cliques = nx.find_cliques(graph)
        clique = gen_cliques.next()
        while more:
            if len(clique) >= clique_size:
                #print "     found seed", len(seeds), graph.number_of_nodes()
                more = False
                possibleseeds = local_seed_communities(graph, clique, 1,
                                                       0.5 * len(clique),
                                                       method)
                possibleseeds.sort(reverse=True)
                seeds.append(possibleseeds[0])
                graph.remove_nodes_from(CD.get_ball(graph, possibleseeds[0],
                                                    1))

            try:
                clique = gen_cliques.next()
            except:
                break

        if more:
            print "ran through that size of clique ", clique_size
            clique_size -= 1

    return seeds
def test_bottle():
    """ Tests the data structure Bucket
    """
    
    print "Testing the Bottle Structure."
        
    print "Testing initialization:"
    kgraph = CD.karate_club_graph()
    b = CD.Bottle('sally', kgraph, [1, 2, 3, 4])
    check_bottle(b, 'sally', 12, 29, 4, 12)
        
    print "Testing add node:"
    b.add_member(kgraph, 31)
    check_bottle(b, 'sally',14, 31, 5, 20)
    
    print "Testing remove node:"
    b.remove_member(kgraph, 31)
    check_bottle(b, 'sally', 12, 29, 4, 12)
def vis_ball(graph, core, radius):
    """ Plots the subgraph corresponding to the ball around core of the given
        radius.
        
    Parameters
    ----------
    graph : a networkx graph
    core : a subset of nodes from graph
    radius : how far out to consider from core
    """
    
    ball = CD.get_ball(graph, core, radius)
    
    subgraph = graph.subgraph(ball)
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    pos = nx.spring_layout(subgraph)
    nx.draw(subgraph, pos)