Beispiel #1
0
    def predict_communities(self, deg_corr):
        if self.is_weighted:
            state = gt.minimize_blockmodel_dl(self.coocurence_graph,
                                              overlap=self.allow_overlap,
                                              deg_corr=deg_corr,
                                              layers=True,
                                              state_args=dict(ec=self.weights,
                                                              layers=False))
        else:
            state = gt.minimize_blockmodel_dl(self.coocurence_graph,
                                              overlap=self.allow_overlap,
                                              deg_corr=deg_corr)

        state = state.copy(B=self.coocurence_graph.num_vertices())

        self.dls_[deg_corr] = []  # description length history
        self.vm_[deg_corr] = None  # vertex marginals
        self.em_[deg_corr] = None  # edge marginals
        self.h_[deg_corr] = np.zeros(self.coocurence_graph.num_vertices() + 1)

        def collect_marginals(s, deg_corr, obj):
            obj.vm_[deg_corr] = s.collect_vertex_marginals(obj.vm_[deg_corr])
            obj.em_[deg_corr] = s.collect_edge_marginals(obj.em_[deg_corr])
            obj.dls_[deg_corr].append(s.entropy())
            B = s.get_nonempty_B()
            obj.h_[deg_corr][B] += 1

        collect_marginals_for_class = lambda s: collect_marginals(
            s, deg_corr, self)

        # Now we collect the marginal distributions for exactly 200,000 sweeps
        gt.mcmc_equilibrate(state,
                            force_niter=self.n_iters,
                            mcmc_args=dict(niter=self.n_init_iters),
                            callback=collect_marginals_for_class,
                            **self.equlibrate_options)

        S_mf = gt.mf_entropy(self.coocurence_graph, self.vm_[deg_corr])
        S_bethe = gt.bethe_entropy(self.coocurence_graph,
                                   self.em_[deg_corr])[0]
        L = -np.mean(self.dls_[deg_corr])

        self.state_[deg_corr] = copy.copy(state)
        self.S_bethe_[deg_corr] = copy.copy(S_bethe)
        self.S_mf_[deg_corr] = copy.copy(S_mf)
        self.L_[deg_corr] = copy.copy(L)

        if self.verbose:
            print(("Model evidence for deg_corr = %s:" % deg_corr, L + S_mf,
                   "(mean field),", L + S_bethe, "(Bethe)"))
Beispiel #2
0
def write_communities(infile, community_file, member_file, min_B, max_B, overlap=True, verbose=True):
    """
    Writes communities and members from graph file.
    :param input: graph filename
    :param community_file: community to members
    :param member_file: node to communities
    :return:
    """
    G = read_graph2(infile)
    N = len(G)

    g = gt.Graph()
    g.add_vertex(N)

    for e in G.edges():
        g.add_edge(g.vertex(e[0]), g.vertex(e[1]))

    # g = gt.collection.data["polbooks"] # sample graph to test function

    state = gt.minimize_blockmodel_dl(g, min_B=min_B, max_B=max_B, overlap=overlap, verbose=verbose)
    blocks = state.get_overlap_blocks()
    bv = blocks[0]
    Bl = dict()
    with open(member_file, "w") as f:
        for u in g.vertices():
            print u, list(bv[u])
            f.write("{} {}\n".format(u, " ".join(map(str, list(bv[u])))))
            for block in list(bv[u]):
                Bl.setdefault(block, []).append(u)

    with open(community_file, "w") as f:
        for block, nodes in Bl.items():
            f.write("{} {}\n".format(block, " ".join(map(str, nodes))))
    def fit_predict(self, X, y):
        """ Performs clustering on y and returns list of label lists

            Builds a label coocurence_graph using :func:`LabelCooccurenceClustererBase.generate_coocurence_adjacency_matrix` on `y` and then detects communities using graph tool's stochastic block modeling.

            Parameters
            ----------
            X : sparse matrix (n_samples, n_features), feature space, not used in this clusterer
            y : sparse matrix (n_samples, n_labels), label space

            Returns
            -------
            partition: list of lists : list of lists label indexes, each sublist represents labels that are in that community
        """
        self.generate_coocurence_adjacency_matrix(y)
        self.generate_coocurence_graph()

        d = gt.minimize_blockmodel_dl(
            self.coocurence_graph, overlap=self.allow_overlap, ec=self.weights)
        A = d.get_blocks().a

        self.label_sets = [[] for i in xrange(d.B)]
        for k in xrange(len(A)):
            self.label_sets[A[k]].append(k)

        self.model_count = len(self.label_sets)

        return np.array(self.label_sets)
Beispiel #4
0
    def fit_predict(self, X, y):
        """ Performs clustering on y and returns list of label lists

            Builds a label coocurence_graph using :func:`LabelCooccurenceClustererBase.generate_coocurence_adjacency_matrix` on `y` and then detects communities using graph tool's stochastic block modeling.

            Parameters
            ----------
            X : sparse matrix (n_samples, n_features), feature space, not used in this clusterer
            y : sparse matrix (n_samples, n_labels), label space

            Returns
            -------
            partition: list of lists : list of lists label indexes, each sublist represents labels that are in that community
        """
        self.generate_coocurence_adjacency_matrix(y)
        self.generate_coocurence_graph()

        d = gt.minimize_blockmodel_dl(self.coocurence_graph,
                                      overlap=self.allow_overlap,
                                      ec=self.weights)
        A = d.get_blocks().a

        self.label_sets = [[] for i in xrange(d.B)]
        for k in xrange(len(A)):
            self.label_sets[A[k]].append(k)

        self.model_count = len(self.label_sets)

        return np.array(self.label_sets)
    def _fit(self) -> None:
        import graph_tool.all as gt  # local import

        gt_g = networkx_to_graphtool(
            self.input_graph)  # convert to graphtool obj
        state = gt.minimize_blockmodel_dl(gt_g)  # run SBM fit
        self.params['state'] = state
        return
Beispiel #6
0
def community_structure_test(graph):
    sys.stdout.write('Getting community structure ...')
    sys.stdout.flush()

    t0 = time.time()
    state = gt.minimize_blockmodel_dl(graph)
    Q = gt.modularity(graph, state.get_blocks())
    t = time.time()
    sys.stdout.write('Ok! ({0} s.)\n'.format(t - t0))
    sys.stdout.flush()

    return Q
def sbm_partitioner(graph, pmin, pmax, runs=5):

    if "hybrid_partition" in graph.structures:
        structure = graph.structures["hybrid_partition"]
        partition = graph.partition(structure, "sbm")
    elif pmax == None and pmin == None and "sbm" in graph.structures:
        structure = graph.best_SBM()[0]
        partition = graph.partition(structure, "sbm")
        graph.structures["hybrid_partition"] = structure
    else:
        g = graph.gtgraph
        best = np.Inf
        for i in range(runs):
            structure = gt.minimize_blockmodel_dl(g,
                                                  deg_corr=True,
                                                  B_min=pmin,
                                                  B_max=pmax)
            if structure.entropy() < best:
                partition = graph.partition(structure, "dc_sbm")
        for i in range(runs):
            structure = gt.minimize_blockmodel_dl(g, B_min=pmin, B_max=pmax)
            if structure.entropy() < best:
                partition = graph.partition(structure, "sbm")
        graph.structures["hybrid_partition"] = structure

    # graph.draw_graph_communities(partition)
    # plt.show()

    unique = np.unique(partition)
    partitions = [[] for i in range(len(unique))]
    for i in range(graph.N):
        partitions[partition[i]].append(i)

    graph_partitions = []
    ng = graph.nxgraph
    for part in partitions:
        subgraph = nx.convert_node_labels_to_integers(ng.subgraph(part))
        graph_partitions.append(eg.Experiment_Graph("part", nxgraph=subgraph))

    return graph_partitions, partitions
Beispiel #8
0
def sbm_dl(g, B_min=None, B_max=None, deg_corr=True, **kwargs):
    """Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models.

    Fit a non-overlapping stochastic block model (SBM) by minimizing its description length using an agglomerative heuristic.
    If no parameter is given, the number of blocks will be discovered automatically. Bounds for the number of communities can
    be provided using B_min, B_max.

    :param B_min: minimum number of communities that can be found
    :param B_max: maximum number of communities that can be found
    :param deg_corr: if true, use the degree corrected version of the SBM
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = sbm_dl(G)


    :References:

    Tiago P. Peixoto, “Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models”, Phys. Rev. E 89, 012804 (2014), DOI: 10.1103/PhysRevE.89.012804 [sci-hub, @tor], arXiv: 1310.4378.
    .. note:: Use implementation from graph-tool library, please report to https://graph-tool.skewed.de for details
    """
    if gt is None:
        raise Exception(
            "===================================================== \n"
            "The graph-tool library seems not to be installed (or incorrectly installed). \n"
            "Please check installation procedure there https://git.skewed.de/count0/graph-tool/wikis/installation-instructions#native-installation \n"
            "on linux/mac, you can use package managers to do so(apt-get install python3-graph-tool, brew install graph-tool, etc.)"
        )
    gt_g = convert_graph_formats(g, nx.Graph)
    gt_g, label_map = __from_nx_to_graph_tool(gt_g)
    state = gt.minimize_blockmodel_dl(gt_g, B_min, B_max, deg_corr=deg_corr)

    affiliations = state.get_blocks().get_array()
    affiliations = {
        label_map[i]: affiliations[i]
        for i in range(len(affiliations))
    }
    coms = affiliations2nodesets(affiliations)
    coms = [list(v) for k, v in coms.items()]
    return NodeClustering(coms,
                          g,
                          "SBM",
                          method_parameters={
                              "B_min": B_min,
                              "B_max": B_max,
                              "deg_corr": deg_corr
                          })
Beispiel #9
0
def draw_football():
    g = gt.collection.data["football"]
    print(g.list_properties())

    state = gt.minimize_blockmodel_dl(g, deg_corr=False)
    state.draw(pos=g.vp.pos, output="football-sbm-fit.png")

    b = state.get_blocks()
    r = b[10]  # group membership of vertex 10
    print(r)
    e = state.get_matrix()

    plt.matshow(e.todense())
    plt.savefig("football-edge-counts.png")
Beispiel #10
0
def draw_football():
    g = gt.collection.data["football"]
    print(g.list_properties())

    state = gt.minimize_blockmodel_dl(g, deg_corr=False)
    state.draw(pos=g.vp.pos, output="football-sbm-fit.png")

    b = state.get_blocks()
    r = b[10]  # group membership of vertex 10
    print(r)
    e = state.get_matrix()

    plt.matshow(e.todense())
    plt.savefig("football-edge-counts.png")
Beispiel #11
0
def blockModel(clusterCommands, Graph, conn, cur):
    gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm
    g = Graph.g
    state = gt.minimize_blockmodel_dl(g)
    b = state.b
    values = b.get_array()
    maxCommID = sorted(values[:])[-1]
    commDict = []
    for i in range(maxCommID+1):
        commDict.append([])
    index = 0
    for each in values:
        nodeID = Graph.indexIdDict[index]
        commDict[each].append(nodeID)
        index += 1
    createTable(clusterCommands, commDict, conn, cur)
Beispiel #12
0
def write_communities(infile,
                      community_file,
                      member_file,
                      min_B,
                      max_B,
                      overlap=True,
                      verbose=True):
    """
    Writes communities and members from graph file.
    :param input: graph filename
    :param community_file: community to members
    :param member_file: node to communities
    :return:
    """
    G = read_graph2(infile)
    N = len(G)

    g = gt.Graph()
    g.add_vertex(N)

    for e in G.edges():
        g.add_edge(g.vertex(e[0]), g.vertex(e[1]))

    # g = gt.collection.data["polbooks"] # sample graph to test function

    state = gt.minimize_blockmodel_dl(g,
                                      min_B=min_B,
                                      max_B=max_B,
                                      overlap=overlap,
                                      verbose=verbose)
    blocks = state.get_overlap_blocks()
    bv = blocks[0]
    Bl = dict()
    with open(member_file, 'w') as f:
        for u in g.vertices():
            print u, list(bv[u])
            f.write("{} {}\n".format(u, " ".join(map(str, list(bv[u])))))
            for block in list(bv[u]):
                Bl.setdefault(block, []).append(u)

    with open(community_file, 'w') as f:
        for block, nodes in Bl.items():
            f.write("{} {}\n".format(block, " ".join(map(str, nodes))))
Beispiel #13
0
def plot(G_gt, layout_gt, n_range, palette, **kwargs):  # pylint: disable=unused-argument

    v_text = G_gt.vertex_properties['id']
    # v_degrees_p = G_gt.degree_property_map('out')
    # v_degrees_p.a = np.sqrt(v_degrees_p.a)+2
    v_degrees_p = G_gt.vertex_properties['degree']
    v_size_p = gt.prop_to_size(v_degrees_p, n_range[0], n_range[1])
    v_fill_color = G_gt.vertex_properties['fill_color']
    e_weights = G_gt.edge_properties['weight']
    e_size_p = gt.prop_to_size(e_weights, 1.0, 4.0)
    # state = gt.minimize_blockmodel_dl(G_gt)
    # state.draw(
    # c = gt.all.closeness(G_gt)

    v_blocks = gt.minimize_blockmodel_dl(G_gt).get_blocks()
    plot_color = G_gt.new_vertex_property('vector<double>')
    G_gt.vertex_properties['plot_color'] = plot_color

    for v_i, v in enumerate(G_gt.vertices()):
        scolor = palette[v_blocks[v_i]]
        plot_color[v] = tuple(int(scolor[i : i + 2], 16) for i in (1, 3, 5)) + (1,)

    gt_draw.graph_draw(
        G_gt,
        # vorder=c,
        pos=layout_gt,
        output_size=(1000, 1000),
        # vertex_text_offset=[-1,1],
        vertex_text_position=0.0,
        vertex_text=v_text,
        vertex_color=[1, 1, 1, 0],
        vertex_fill_color=v_fill_color,
        vertex_size=v_size_p,
        vertex_font_family='helvetica',
        vertex_text_color='black',
        edge_pen_width=e_size_p,
        inline=True,
    )
Beispiel #14
0
        ecolor[e] = 'green'
    i += 1

g.edge_properties["weight"] = eweight
g.edge_properties["color"] = ecolor
'''

#pos = gt.planar_layout(g)
#pos = gt.radial_tree_layout(g, g.vertex(0))

for i in range(3):
    pos = gt.arf_layout(g, d = 1, a = 5, max_iter=0) # good
    #pos = gt.fruchterman_reingold_layout(g, n_iter=1000)
    #pos = gt.sfdp_layout(g, C = 1)
    #pos = gt.circle_layout(g)
    #gt.graph_draw(g, pos = pos, vertex_text=g.vertex_properties["name"], vertex_font_size=20, vertex_size=10, vertex_color = 'white', vertex_fill_color = 'blue', vertex_text_position=0, output_size=(2000, 1000), output="imgs/small_graph_top_" + str(i) + ".pdf")
    #gt.graph_draw(g, pos = pos, vertex_text=g.vertex_properties["name"], vertex_font_size=20, vertex_size=10, vertex_color = 'white', vertex_fill_color = 'blue', vertex_text_position=0, output_size=(2000, 1000), output="imgs/small_graph_top_" + str(i) + ".png")

    state = gt.minimize_blockmodel_dl(g) # , deg_corr=True, B_min = 10
    state.draw(pos=pos, vertex_shape=state.get_blocks(), vertex_text=g.vertex_properties["name"], vertex_font_size=20, vertex_size=20, edge_pen_width = 2, vertex_text_position=0, output="small_graph_top/small_graph_top_blocks_mdl_" + str(i) + ".pdf", output_size=(1500, 1000), fit_view=1.1)
    state.draw(pos=pos, vertex_shape=state.get_blocks(), vertex_text=g.vertex_properties["name"], vertex_font_size=20, vertex_size=20, edge_pen_width = 2, vertex_text_position=0, output="small_graph_top/small_graph_top_blocks_mdl_" + str(i) + ".png", output_size=(1500, 1000), fit_view=1.1)
    print(i)
    #gt.draw_hierarchy(state, layout="sfdp", vertex_text=g.vertex_properties["name"], vertex_font_size=24, vertex_text_position="centered", edge_color=g.edge_properties["color"], output_size=(2000, 1000), output="small_graph_mdl.pdf", fit_view = 0.8, hide = 2)

print(vchrom)
print(np.array(vchrom))


state = gt.NestedBlockState(g, [np.array(vchrom), np.arange(0, 22)])
gt.draw_hierarchy(state, vertex_text=g.vertex_properties["name"], vertex_font_size=24, vertex_text_position="centered", output_size=(2000, 1000), output="small_graph_top/small_graph_top_mdl.pdf", fit_view = 0.8, hide = 2)
Beispiel #15
0
def build_block_model(useOnDemand, viewer_condition, content_condition, size,
                      use_deg_corr, use_edge_weights, savedDir):
    g, engagement_df, viewers, content = build_tree(useOnDemand,
                                                    viewer_condition,
                                                    content_condition, size,
                                                    savedDir)
    state_args = dict(recs=[g.ep.engagement], rec_types=[
        "real-exponential"
    ]) if use_edge_weights else dict()
    state = gt.minimize_blockmodel_dl(g,
                                      state_args=state_args,
                                      deg_corr=use_deg_corr)
    b = state.get_blocks()
    verticies = g.get_vertices()
    results = {
        "entropy": state.entropy(),
        "results": [],
        "edges": engagement_df.to_dict('records'),
        "viewers": viewers,
        "content": content
    }
    counter = {"count": 0}
    for i, v in enumerate(verticies):
        matching = [x for x in results.get("results") if x.get('name') == b[i]]
        children = []
        block = {}
        if matching:
            # the block was found, so get it's children array so we can append a value
            block = matching[0]
            children = block.get("children")
        else:
            # the block does not yet have an entry in the result set, so add one
            counter['count'] += 1
            (content_aggs, viewer_aggs) = initAggregates()
            block = {
                "id": counter.get('count'),
                "name": b[i],
                "children": children,
                "viewer_count": 0,
                "content_count": 0,
                "viewer_aggs": viewer_aggs,
                "content_aggs": content_aggs
            }
            results['results'].append(block)

        if v_is_content[v]:
            # the vertex is content, so check whether we already have a list of content for this block
            block['content_count'] += 1
            match = [x for x in children if x.get('name') == 'Content']
            content = {"content": []}
            if match:
                # We have a content list, so get it
                content = match[0]['children'][0]
            else:
                # create a new content list
                counter['count'] += 1
                children.append({
                    "id": counter.get('count'),
                    'name': 'Content',
                    'children': [content]
                })
            result_item = get_result_item(v)
            aggregate_content(result_item, block.get("content_aggs"))
            content['content'].append(result_item)
        else:
            # the vertex is a viewer, so check whether we already have a list of viewers for this block
            block['viewer_count'] += 1
            match = [x for x in children if x.get('name') == 'Viewers']
            viewers = {"viewers": []}
            if match:
                # we have a viewer list, so get it
                viewers = match[0]['children'][0]
            else:
                # create a new viewer list
                counter['count'] += 1
                children.append({
                    "id": counter.get('count'),
                    'name': 'Viewers',
                    'children': [viewers]
                })
            result_item = get_result_item(v)
            aggregate_viewers(result_item, block.get("viewer_aggs"))
            viewers["viewers"].append(result_item)
    return results
        AccVer = graph.vp.AccessionVersion[v]
        PtuManual = graph.vp.PtuManual[v]
        CComp = str(graph.vp.CComp[v])
        BlockCC = bcc[v]
        Block = str(b[v])
        f.write(AccVer + "\t" + PtuManual + "\t" + CComp + "\t" + BlockCC +
                "\t" + Block + "\n")
    f.close()


# Find the initial model with minimum entropy
nToss = 100
state_list, entropy_list = [], []
for k in range(nToss):
    # Regular stochastic block model (SBM)
    state = gt.minimize_blockmodel_dl(g, deg_corr=False)
    entropy = state.entropy()

    # Update state
    state_list.append(state)
    entropy_list.append(entropy)
    nClass = len(np.unique(state.get_blocks().a))
    print("Toss %d of %d: %d classes, entropy %f" %
          (k, nToss, nClass, entropy))

    # Save graph
    fname = "SBM_%d_%f" % (nClass, entropy)
    write_classes(os.path.join(outdir, fname + ".tsv"), g, state)
    pickle.dump([g, state], open(os.path.join(outdir, fname + ".pickle"),
                                 "wb"), -1)
    g.save(os.path.join(outdir, fname + ".gt.gz"))
Beispiel #17
0
 def compute_local_blocks(self):
     state = gt.minimize_blockmodel_dl(self.g, verbose=True)
     b = state.b
     mod = gt.modularity(self.g, b)
     print mod
     return b
Beispiel #18
0
def run_analysis(netfile, compnet_files):
    '''
    Run the analysis.  
    :param netfile: Filename of the network to analyze
    :param compnet_files: List of filenames of the comparison networks, viz.,
        the high-energy physics networks.  
    '''
    
    # Timestamp
    # --------------------
    print(datetime.now())
    
    # Load the network
    # --------------------
    net, outfile_pre, core_pmap, core_vertices = load_net(netfile + '.graphml', 
                                                             core = True,
                                                             filter = True)
    output_folder = 'output/'
    outfile_pre = output_folder + outfile_pre
     
    # Plotting
    print('Plotting')
    layout = layout_and_plot(net, core_pmap, outfile_pre)
    # Store the layout in the net
    net.vp['layout'] = layout
    # Show only the core vertices    
    net.set_vertex_filter(core_pmap)
    layout_and_plot(net, core_pmap, outfile_pre, filename_mod = '.core.net', 
    				reverse_colors = True)
    net.set_vertex_filter(None)
    
    # Vertex statistics
    # --------------------
    # ECDF for out-degree distribution
    degree_dist(net, core_vertices, outfile = outfile_pre, 
                show_plot = False, save_plot = True)
    # ECDF for eigenvector centrality
    ## Currently this is causing a segmentation fault
#     ev_centrality_dist(net, core_vertices, outfile = outfile_pre, 
#                 show_plot = False, save_plot = True)
    
    # Modularity
    # --------------------
    # Calculate modularity, using the core vertices as the partition
    modularity = gtcomm.modularity(net, core_pmap)
    print('Observed modularity: ' + str(modularity))
    obs_ins = insularity(net, core_pmap)
    print('Observed insularity: ' + str(obs_ins))
   
    # Calculate the number of core vertices
    n_core = len(core_vertices)
    # Construct a sampling distribution for the modularity statistic
    #  And use it to calculate a p-value for the modularity
    print('Random sample modularity')
    modularity_sample_dist(net, n_core, modularity,
                                outfile = outfile_pre + '.mod', 
                                show_plot = False, save_plot = True)
    print('Random sample insularities')
    modularity_sample_dist(net, n_core, obs_ins, 
                                mod_func = insularity, 
                                outfile = outfile_pre + '.ins',
                                show_plot = False, save_plot = True)
    
    # Information-theoretic partitioning
    print('Information-theoretic partitioning')
    # Calculate the partition
    gt.seed_rng(5678)
    np.random.seed(5678)
    part_block = gt.minimize_blockmodel_dl(net, B_min = 2, B_max = 2, 
    										verbose = True, 
    										overlap = False)
    # Extract the block memberships as a pmap
    net.vp['partition'] = part_block.get_blocks()
    # Calculate the modularity
    block_modularity = gtcomm.modularity(net, net.vp['partition'])
    print('Partion modularity: ' + str(block_modularity))
    print('Partition insularities')
    block_insularities = partition_insularity(net, net.vp['partition'])
    for community in block_insularities:
        print('Community ' + str(community) + ': ' + 
                str(block_insularities[community]))
    
    print('Plotting')
    size_pmap = gt.prop_to_size(core_pmap, mi = 10, ma = 20)
    layout_and_plot(net, net.vp['partition'], outfile_pre,
                        size_pmap = size_pmap, filename_mod = '.partition')
    
    # Modularity optimization
    optimal_sample_dist(net, modularity, obs_ins,
                                outfile = outfile_pre, 
                                show_plot = False, save_plot = True)
    

    # Save results
    # --------------------
    # The above covers all of the analysis to be written into the output files,
    #  so we'll go ahead and save things now.  
    print('Saving')
    # Save in graph-tool's binary format
    net.save(outfile_pre + '.out' + '.gt')
    # Replace vector-type properties with strings
    #net.list_properties()
    properties = net.vertex_properties
    for property_key in properties.keys():
        property = properties[property_key]
        if 'vector' in property.value_type():
            properties[property_key] = property.copy(value_type = 'string')
    # Save as graphml
    net.save(outfile_pre + '.out' + '.graphml')


    # Comparison networks
    # --------------------
    for compnet_file in compnet_files:
        # Load the comparison network
        compnet, compnet_outfile = load_net(compnet_file)
        # Set it to the same directedness as the network of interest
        compnet.set_directed(net.is_directed())
        # Size of compnet
        n_compnet = compnet.num_vertices()
        # Num vertices in compnet to use in each random partition
        k_compnet = round(n_core / net.num_vertices() * n_compnet)
        # Sample distribution based on random partition
        print('Random sample modularities')
        print('Observed modularity: ' + str(modularity))
        modularity_sample_dist(compnet, k_compnet, modularity, 
                                outfile = outfile_pre + '.mod.' + compnet_outfile, 
                                show_plot = False, save_plot = True)
        print('Random sample insularities')
        print('Observed insularity: ' + str(obs_ins))
        modularity_sample_dist(compnet, k_compnet, obs_ins, 
                                mod_func = insularity, 
                                outfile = outfile_pre + '.ins.' + compnet_outfile,
                                show_plot = False, save_plot = True)
        # Sample distribution based on optimizing modularity
#         optimal_sample_dist(compnet, modularity, n_samples = 300, 
#                                 outfile = outfile_pre + '.mod.' + compnet_outfile,  
#                                 show_plot = False)


    # Timestamp
    # --------------------
    print(datetime.now())
    # Visually separate analyses
    print('-'*40)
        "mag_journal_id": nodes,
        "community_id": cids
    })


if __name__ == "__main__":

    OUTPUT = sys.argv.pop()
    YEARS = [int(y) for y in sys.argv[1:]]

    print("years", YEARS)

    print("Loading networks")
    A, Araw, nodes = utils.load_network(YEARS)

    print("Construct graph tool graph object")
    G = to_graph_tool(A)

    print("Estimating")
    states = gt.minimize_blockmodel_dl(
        G,
        deg_corr=True,
        state_args=dict(eweight=G.ep.weight),
        verbose=True,
        B_max=np.round(A.shape[0] / 3).astype(int),
    )

    print("Save")
    community_table = make_community_table(states, nodes)
    community_table.to_csv(OUTPUT, sep="\t")
Beispiel #20
0
#import networkx as nx
#module load graph_tool
#from graph_tool.all import *
import graph_tool.all as gt
import matplotlib.pyplot as plt

ht = "ilmastonmuutos"
g = gt.load_graph(ht + "/" + ht + "_retweet_network_giant.gml")
#g = gt.Graph()

#bmodel = gt.minimize_nested_blockmodel_dl(g, B_min=2, B_max=2)
current_bmodel = gt.minimize_blockmodel_dl(g,
                                           B_min=2,
                                           B_max=2,
                                           deg_corr=True,
                                           verbose=True)

#for _ in range(100):
#    bmodel = gt.minimize_blockmodel_dl(g, B_min=2, B_max=2, deg_corr=True, verbose=True)
#    if (bmodel.entropy() < current_bmodel.entropy()):
#        current_bmodel = bmodel
#    else:
#        continue

#print(len(bmodel.get_bs()))
#bmodel.draw()
current_bmodel.draw()
b = current_bmodel.get_blocks()
community = g.new_vertex_property("int16_t")
for v in g.vertices():
    community[v] = b[v]
            load_true_partition=False,
            strm_piece_num=part,
            out_neighbors=out_neighbors,
            in_neighbors=in_neighbors)
else:
    out_neighbors, in_neighbors, N, E, true_partition = load_graph(
        input_filename, load_true_partition=True)

input_graph = gt.Graph()
input_graph.add_edge_list([(i, j) for i in range(len(out_neighbors))
                           if len(out_neighbors[i]) > 0
                           for j in out_neighbors[i][:, 0]])
t0 = timeit.default_timer()
# the parallel switch determines whether MCMC updates are run in parallel, epsilon is the convergence threshold for
# the nodal updates (smaller value is stricter), and the verbose option prints updates on each step of the algorithm.
# Please refer to the graph-tool documentation under graph-tool.inference for details on the input parameters

if args.threads > 0:
    gt.openmp_set_num_threads(args.threads)

graph_tool_partition = gt.minimize_blockmodel_dl(input_graph,
                                                 mcmc_args={'parallel': True},
                                                 mcmc_equilibrate_args={
                                                     'verbose': False,
                                                     'epsilon': 1e-4
                                                 },
                                                 verbose=True)
t1 = timeit.default_timer()
print('\nGraph partition took {} seconds'.format(t1 - t0))
evaluate_partition(true_partition, graph_tool_partition.get_blocks().a)
Beispiel #22
0
        elif (args.weight_model == 'Exponential'):
            state = gt.minimize_nested_blockmodel_dl(
                g,
                deg_corr=args.deg_corr,
                state_args=dict(recs=[y], rec_types=['real-exponential']))
        else:
            state = gt.minimize_nested_blockmodel_dl(
                g,
                deg_corr=args.deg_corr,
                state_args=dict(recs=[y], rec_types=['real-normal']))
        state_0 = state.get_levels()[0]
        nClass = len(np.unique(state_0.get_blocks().a))
    else:
        # Flat stochastic block model (SBM)
        if (args.weight_model == 'None'):
            state = gt.minimize_blockmodel_dl(g, deg_corr=args.deg_corr)
        elif (args.weight_model == 'Exponential'):
            state = gt.minimize_blockmodel_dl(
                g,
                deg_corr=args.deg_corr,
                state_args=dict(recs=[y], rec_types=['real-exponential']))
        else:
            state = gt.minimize_blockmodel_dl(g,
                                              deg_corr=args.deg_corr,
                                              state_args=dict(
                                                  recs=[y],
                                                  rec_types=['real-normal']))
        nClass = len(np.unique(state.get_blocks().a))
    entropy = state.entropy()

    # Update state
Beispiel #23
0
def find_communities(nnodes, edges, alg, params=None):
    def membership2cs(membership):
        cs = {}
        for i, m in enumerate(membership):
            cs.setdefault(m, []).append(i)
        return cs.values()

    def connected_subgraphs(G: nx.Graph):
        for comp in nx.connected_components(G):
            sub = nx.induced_subgraph(G, comp)
            sub = nx.convert_node_labels_to_integers(sub,
                                                     label_attribute='old')
            yield sub

    def apply_subgraphs(algorithm, **params):
        cs = []
        for sub in connected_subgraphs(G):
            if len(sub.nodes) <= 3:
                coms = [sub.nodes]  # let it be a cluster
            else:
                coms = algorithm(sub, **params)
                if hasattr(coms, 'communities'):
                    coms = coms.communities

            for com in coms:
                cs.append([sub.nodes[i]['old'] for i in set(com)])
        return cs

    def karate_apply(algorithm, graph, **params):
        model = algorithm(**params)
        model.fit(graph)
        return membership2cs(model.get_memberships().values())

    if alg == 'big_clam':
        c = -1 if params['c'] == 'auto' else int(params['c'])
        cs = BigClam('../../snap').run(edges, c=c, xc=int(params['xc']))
    elif alg in ('gmm', 'kclique', 'lprop', 'lprop_async', 'fluid',
                 'girvan_newman', 'angel', 'congo', 'danmf', 'egonet_splitter',
                 'lfm', 'multicom', 'nmnf', 'nnsed', 'node_perception', 'slpa',
                 'GEMSEC', 'EdMot', 'demon'):
        G = nx.Graph()
        G.add_edges_from(edges)

        if alg == 'gmm':
            cs = community.greedy_modularity_communities(G)
        elif alg == 'kclique':
            params = {k: float(v) for k, v in params.items()}
            cs = community.k_clique_communities(G, **params)
        elif alg == 'lprop':
            cs = community.label_propagation_communities(G)
        elif alg == 'lprop_async':
            cs = community.asyn_lpa_communities(G, seed=0)
        elif alg == 'fluid':
            params = {k: int(v) for k, v in params.items()}
            params['seed'] = 0
            cs = apply_subgraphs(community.asyn_fluidc, **params)
        elif alg == 'girvan_newman':
            comp = community.girvan_newman(G)
            for cs in itertools.islice(comp, int(params['k'])):
                pass
        elif alg == 'angel':
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.angel(G, **params).communities
        elif alg == 'congo':  # too slow
            ncoms = int(params['number_communities'])
            cs = []
            for sub in connected_subgraphs(G):
                if len(sub.nodes) <= max(3, ncoms):
                    cs.append(sub.nodes)  # let it be a cluster
                else:
                    coms = cdlib.congo(sub,
                                       number_communities=ncoms,
                                       height=int(params['height']))
                    for com in coms.communities:
                        cs.append([sub.nodes[i]['old'] for i in set(com)])
        elif alg == 'danmf':  # no overlapping
            cs = apply_subgraphs(cdlib.danmf)
        elif alg == 'egonet_splitter':
            params['resolution'] = float(params['resolution'])
            cs = apply_subgraphs(cdlib.egonet_splitter, **params)
        elif alg == 'lfm':
            coms = cdlib.lfm(G, float(params['alpha']))
            cs = coms.communities
        elif alg == 'multicom':
            cs = cdlib.multicom(G, seed_node=0).communities
        elif alg == 'nmnf':
            params = {k: int(v) for k, v in params.items()}
            cs = apply_subgraphs(cdlib.nmnf, **params)
        elif alg == 'nnsed':
            cs = apply_subgraphs(cdlib.nnsed)
        elif alg == 'node_perception':  # not usable
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.node_perception(G, **params).communities
        elif alg == 'slpa':
            params["t"] = int(params["t"])
            params["r"] = float(params["r"])
            cs = cdlib.slpa(G, **params).communities
        elif alg == 'demon':
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.demon(G, **params).communities
        elif alg == 'GEMSEC':
            # gamma = float(params.pop('gamma'))
            params = {k: int(v) for k, v in params.items()}
            # params['gamma'] = gamma
            params['seed'] = 0
            _wrap = partial(karate_apply, karateclub.GEMSEC)
            cs = apply_subgraphs(_wrap, **params)
        elif alg == 'EdMot':
            params = {k: int(v) for k, v in params.items()}
            _wrap = partial(karate_apply, karateclub.EdMot)
            cs = apply_subgraphs(_wrap, **params)

    elif alg in ('infomap', 'community_leading_eigenvector', 'leig',
                 'multilevel', 'optmod', 'edge_betweenness', 'spinglass',
                 'walktrap', 'leiden', 'hlc'):
        G = igraph.Graph()
        G.add_vertices(nnodes)
        G.add_edges(edges)

        if alg == 'infomap':
            vcl = G.community_infomap(trials=int(params['trials']))
            cs = membership2cs(vcl.membership)
        elif alg == 'leig':
            clusters = None if params['clusters'] == 'auto' else int(
                params['clusters'])
            vcl = G.community_leading_eigenvector(clusters=clusters)
            cs = membership2cs(vcl.membership)
        elif alg == 'multilevel':
            vcl = G.community_multilevel()
            cs = membership2cs(vcl.membership)
        elif alg == 'optmod':  # too long
            membership, modularity = G.community_optimal_modularity()
            cs = membership2cs(vcl.membership)
        elif alg == 'edge_betweenness':
            clusters = None if params['clusters'] == 'auto' else int(
                params['clusters'])
            dendrogram = G.community_edge_betweenness(clusters, directed=False)
            try:
                clusters = dendrogram.as_clustering()
            except:
                return []
            cs = membership2cs(clusters.membership)
        elif alg == 'spinglass':  # only for connected graph
            vcl = G.community_spinglass(parupdate=True,
                                        update_rule=params['update_rule'],
                                        start_temp=float(params['start_temp']),
                                        stop_temp=float(params['stop_temp']))
            cs = membership2cs(vcl.membership)
        elif alg == 'walktrap':
            dendrogram = G.community_walktrap(steps=int(params['steps']))
            try:
                clusters = dendrogram.as_clustering()
            except:
                return []
            cs = membership2cs(clusters.membership)
        elif alg == 'leiden':
            vcl = G.community_leiden(
                objective_function=params['objective_function'],
                resolution_parameter=float(params['resolution_parameter']),
                n_iterations=int(params['n_iterations']))
            cs = membership2cs(vcl.membership)
        elif alg == 'hlc':
            algorithm = HLC(G, min_size=int(params['min_size']))
            cs = algorithm.run(None)

    elif alg in ("sbm", "sbm_nested"):
        np.random.seed(42)
        gt.seed_rng(42)

        G = gt.Graph(directed=False)
        G.add_edge_list(edges)

        deg_corr = bool(params['deg_corr'])
        B_min = None if params['B_min'] == 'auto' else int(params['B_min'])
        B_max = None if params['B_max'] == 'auto' else int(params['B_max'])

        if alg == "sbm":
            state = gt.minimize_blockmodel_dl(G,
                                              deg_corr=deg_corr,
                                              B_min=B_min,
                                              B_max=B_max)

            membership = state.get_blocks()
            cs = membership2cs(membership)
        if alg == "sbm_nested":
            state = gt.minimize_nested_blockmodel_dl(G,
                                                     deg_corr=deg_corr,
                                                     B_min=B_min,
                                                     B_max=B_max)
            levels = state.get_bs()
            level_max = int(params['level'])

            membership = {}
            for nid in range(nnodes):
                cid = nid
                level_i = len(levels)
                for level in levels:
                    cid = level[cid]
                    if level_i == level_max:
                        membership.setdefault(cid, []).append(nid)
                        break
                    level_i -= 1

            cs = membership.values()

    else:
        return None

    return list(cs)
Beispiel #24
0
        ecolor3[e] = "#8073ac"
        esize3[e] = 1

for e in G4.edges():
    source = int(e.source())
    target = int(e.target())
    if (source, target) in in_edges4 or (target, source) in in_edges4:

        ecolor4[e] = "#e08214"
        esize4[e] = 2.5
    else:
        ecolor4[e] = "#8073ac"
        esize4[e] = 1

############draw graph#####################
state = gt.minimize_blockmodel_dl(G2)
b = state.b

gt.graph_draw(G1,
              pos1,
              vertex_size=15,
              vertex_shape="circle",
              vertex_fill_color="#3f3f3f",
              edge_color=ecolor1,
              edge_pen_width=esize1,
              output="1.network1.pdf")
gt.graph_draw(G2,
              pos2,
              vertex_size=15,
              vertex_shape="circle",
              vertex_fill_color="#3f3f3f",