def predict_communities(self, deg_corr): if self.is_weighted: state = gt.minimize_blockmodel_dl(self.coocurence_graph, overlap=self.allow_overlap, deg_corr=deg_corr, layers=True, state_args=dict(ec=self.weights, layers=False)) else: state = gt.minimize_blockmodel_dl(self.coocurence_graph, overlap=self.allow_overlap, deg_corr=deg_corr) state = state.copy(B=self.coocurence_graph.num_vertices()) self.dls_[deg_corr] = [] # description length history self.vm_[deg_corr] = None # vertex marginals self.em_[deg_corr] = None # edge marginals self.h_[deg_corr] = np.zeros(self.coocurence_graph.num_vertices() + 1) def collect_marginals(s, deg_corr, obj): obj.vm_[deg_corr] = s.collect_vertex_marginals(obj.vm_[deg_corr]) obj.em_[deg_corr] = s.collect_edge_marginals(obj.em_[deg_corr]) obj.dls_[deg_corr].append(s.entropy()) B = s.get_nonempty_B() obj.h_[deg_corr][B] += 1 collect_marginals_for_class = lambda s: collect_marginals( s, deg_corr, self) # Now we collect the marginal distributions for exactly 200,000 sweeps gt.mcmc_equilibrate(state, force_niter=self.n_iters, mcmc_args=dict(niter=self.n_init_iters), callback=collect_marginals_for_class, **self.equlibrate_options) S_mf = gt.mf_entropy(self.coocurence_graph, self.vm_[deg_corr]) S_bethe = gt.bethe_entropy(self.coocurence_graph, self.em_[deg_corr])[0] L = -np.mean(self.dls_[deg_corr]) self.state_[deg_corr] = copy.copy(state) self.S_bethe_[deg_corr] = copy.copy(S_bethe) self.S_mf_[deg_corr] = copy.copy(S_mf) self.L_[deg_corr] = copy.copy(L) if self.verbose: print(("Model evidence for deg_corr = %s:" % deg_corr, L + S_mf, "(mean field),", L + S_bethe, "(Bethe)"))
def write_communities(infile, community_file, member_file, min_B, max_B, overlap=True, verbose=True): """ Writes communities and members from graph file. :param input: graph filename :param community_file: community to members :param member_file: node to communities :return: """ G = read_graph2(infile) N = len(G) g = gt.Graph() g.add_vertex(N) for e in G.edges(): g.add_edge(g.vertex(e[0]), g.vertex(e[1])) # g = gt.collection.data["polbooks"] # sample graph to test function state = gt.minimize_blockmodel_dl(g, min_B=min_B, max_B=max_B, overlap=overlap, verbose=verbose) blocks = state.get_overlap_blocks() bv = blocks[0] Bl = dict() with open(member_file, "w") as f: for u in g.vertices(): print u, list(bv[u]) f.write("{} {}\n".format(u, " ".join(map(str, list(bv[u]))))) for block in list(bv[u]): Bl.setdefault(block, []).append(u) with open(community_file, "w") as f: for block, nodes in Bl.items(): f.write("{} {}\n".format(block, " ".join(map(str, nodes))))
def fit_predict(self, X, y): """ Performs clustering on y and returns list of label lists Builds a label coocurence_graph using :func:`LabelCooccurenceClustererBase.generate_coocurence_adjacency_matrix` on `y` and then detects communities using graph tool's stochastic block modeling. Parameters ---------- X : sparse matrix (n_samples, n_features), feature space, not used in this clusterer y : sparse matrix (n_samples, n_labels), label space Returns ------- partition: list of lists : list of lists label indexes, each sublist represents labels that are in that community """ self.generate_coocurence_adjacency_matrix(y) self.generate_coocurence_graph() d = gt.minimize_blockmodel_dl( self.coocurence_graph, overlap=self.allow_overlap, ec=self.weights) A = d.get_blocks().a self.label_sets = [[] for i in xrange(d.B)] for k in xrange(len(A)): self.label_sets[A[k]].append(k) self.model_count = len(self.label_sets) return np.array(self.label_sets)
def fit_predict(self, X, y): """ Performs clustering on y and returns list of label lists Builds a label coocurence_graph using :func:`LabelCooccurenceClustererBase.generate_coocurence_adjacency_matrix` on `y` and then detects communities using graph tool's stochastic block modeling. Parameters ---------- X : sparse matrix (n_samples, n_features), feature space, not used in this clusterer y : sparse matrix (n_samples, n_labels), label space Returns ------- partition: list of lists : list of lists label indexes, each sublist represents labels that are in that community """ self.generate_coocurence_adjacency_matrix(y) self.generate_coocurence_graph() d = gt.minimize_blockmodel_dl(self.coocurence_graph, overlap=self.allow_overlap, ec=self.weights) A = d.get_blocks().a self.label_sets = [[] for i in xrange(d.B)] for k in xrange(len(A)): self.label_sets[A[k]].append(k) self.model_count = len(self.label_sets) return np.array(self.label_sets)
def _fit(self) -> None: import graph_tool.all as gt # local import gt_g = networkx_to_graphtool( self.input_graph) # convert to graphtool obj state = gt.minimize_blockmodel_dl(gt_g) # run SBM fit self.params['state'] = state return
def community_structure_test(graph): sys.stdout.write('Getting community structure ...') sys.stdout.flush() t0 = time.time() state = gt.minimize_blockmodel_dl(graph) Q = gt.modularity(graph, state.get_blocks()) t = time.time() sys.stdout.write('Ok! ({0} s.)\n'.format(t - t0)) sys.stdout.flush() return Q
def sbm_partitioner(graph, pmin, pmax, runs=5): if "hybrid_partition" in graph.structures: structure = graph.structures["hybrid_partition"] partition = graph.partition(structure, "sbm") elif pmax == None and pmin == None and "sbm" in graph.structures: structure = graph.best_SBM()[0] partition = graph.partition(structure, "sbm") graph.structures["hybrid_partition"] = structure else: g = graph.gtgraph best = np.Inf for i in range(runs): structure = gt.minimize_blockmodel_dl(g, deg_corr=True, B_min=pmin, B_max=pmax) if structure.entropy() < best: partition = graph.partition(structure, "dc_sbm") for i in range(runs): structure = gt.minimize_blockmodel_dl(g, B_min=pmin, B_max=pmax) if structure.entropy() < best: partition = graph.partition(structure, "sbm") graph.structures["hybrid_partition"] = structure # graph.draw_graph_communities(partition) # plt.show() unique = np.unique(partition) partitions = [[] for i in range(len(unique))] for i in range(graph.N): partitions[partition[i]].append(i) graph_partitions = [] ng = graph.nxgraph for part in partitions: subgraph = nx.convert_node_labels_to_integers(ng.subgraph(part)) graph_partitions.append(eg.Experiment_Graph("part", nxgraph=subgraph)) return graph_partitions, partitions
def sbm_dl(g, B_min=None, B_max=None, deg_corr=True, **kwargs): """Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models. Fit a non-overlapping stochastic block model (SBM) by minimizing its description length using an agglomerative heuristic. If no parameter is given, the number of blocks will be discovered automatically. Bounds for the number of communities can be provided using B_min, B_max. :param B_min: minimum number of communities that can be found :param B_max: maximum number of communities that can be found :param deg_corr: if true, use the degree corrected version of the SBM :return: NodeClustering object :Example: >>> from cdlib import algorithms >>> import networkx as nx >>> G = nx.karate_club_graph() >>> coms = sbm_dl(G) :References: Tiago P. Peixoto, “Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models”, Phys. Rev. E 89, 012804 (2014), DOI: 10.1103/PhysRevE.89.012804 [sci-hub, @tor], arXiv: 1310.4378. .. note:: Use implementation from graph-tool library, please report to https://graph-tool.skewed.de for details """ if gt is None: raise Exception( "===================================================== \n" "The graph-tool library seems not to be installed (or incorrectly installed). \n" "Please check installation procedure there https://git.skewed.de/count0/graph-tool/wikis/installation-instructions#native-installation \n" "on linux/mac, you can use package managers to do so(apt-get install python3-graph-tool, brew install graph-tool, etc.)" ) gt_g = convert_graph_formats(g, nx.Graph) gt_g, label_map = __from_nx_to_graph_tool(gt_g) state = gt.minimize_blockmodel_dl(gt_g, B_min, B_max, deg_corr=deg_corr) affiliations = state.get_blocks().get_array() affiliations = { label_map[i]: affiliations[i] for i in range(len(affiliations)) } coms = affiliations2nodesets(affiliations) coms = [list(v) for k, v in coms.items()] return NodeClustering(coms, g, "SBM", method_parameters={ "B_min": B_min, "B_max": B_max, "deg_corr": deg_corr })
def draw_football(): g = gt.collection.data["football"] print(g.list_properties()) state = gt.minimize_blockmodel_dl(g, deg_corr=False) state.draw(pos=g.vp.pos, output="football-sbm-fit.png") b = state.get_blocks() r = b[10] # group membership of vertex 10 print(r) e = state.get_matrix() plt.matshow(e.todense()) plt.savefig("football-edge-counts.png")
def blockModel(clusterCommands, Graph, conn, cur): gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm g = Graph.g state = gt.minimize_blockmodel_dl(g) b = state.b values = b.get_array() maxCommID = sorted(values[:])[-1] commDict = [] for i in range(maxCommID+1): commDict.append([]) index = 0 for each in values: nodeID = Graph.indexIdDict[index] commDict[each].append(nodeID) index += 1 createTable(clusterCommands, commDict, conn, cur)
def write_communities(infile, community_file, member_file, min_B, max_B, overlap=True, verbose=True): """ Writes communities and members from graph file. :param input: graph filename :param community_file: community to members :param member_file: node to communities :return: """ G = read_graph2(infile) N = len(G) g = gt.Graph() g.add_vertex(N) for e in G.edges(): g.add_edge(g.vertex(e[0]), g.vertex(e[1])) # g = gt.collection.data["polbooks"] # sample graph to test function state = gt.minimize_blockmodel_dl(g, min_B=min_B, max_B=max_B, overlap=overlap, verbose=verbose) blocks = state.get_overlap_blocks() bv = blocks[0] Bl = dict() with open(member_file, 'w') as f: for u in g.vertices(): print u, list(bv[u]) f.write("{} {}\n".format(u, " ".join(map(str, list(bv[u]))))) for block in list(bv[u]): Bl.setdefault(block, []).append(u) with open(community_file, 'w') as f: for block, nodes in Bl.items(): f.write("{} {}\n".format(block, " ".join(map(str, nodes))))
def plot(G_gt, layout_gt, n_range, palette, **kwargs): # pylint: disable=unused-argument v_text = G_gt.vertex_properties['id'] # v_degrees_p = G_gt.degree_property_map('out') # v_degrees_p.a = np.sqrt(v_degrees_p.a)+2 v_degrees_p = G_gt.vertex_properties['degree'] v_size_p = gt.prop_to_size(v_degrees_p, n_range[0], n_range[1]) v_fill_color = G_gt.vertex_properties['fill_color'] e_weights = G_gt.edge_properties['weight'] e_size_p = gt.prop_to_size(e_weights, 1.0, 4.0) # state = gt.minimize_blockmodel_dl(G_gt) # state.draw( # c = gt.all.closeness(G_gt) v_blocks = gt.minimize_blockmodel_dl(G_gt).get_blocks() plot_color = G_gt.new_vertex_property('vector<double>') G_gt.vertex_properties['plot_color'] = plot_color for v_i, v in enumerate(G_gt.vertices()): scolor = palette[v_blocks[v_i]] plot_color[v] = tuple(int(scolor[i : i + 2], 16) for i in (1, 3, 5)) + (1,) gt_draw.graph_draw( G_gt, # vorder=c, pos=layout_gt, output_size=(1000, 1000), # vertex_text_offset=[-1,1], vertex_text_position=0.0, vertex_text=v_text, vertex_color=[1, 1, 1, 0], vertex_fill_color=v_fill_color, vertex_size=v_size_p, vertex_font_family='helvetica', vertex_text_color='black', edge_pen_width=e_size_p, inline=True, )
ecolor[e] = 'green' i += 1 g.edge_properties["weight"] = eweight g.edge_properties["color"] = ecolor ''' #pos = gt.planar_layout(g) #pos = gt.radial_tree_layout(g, g.vertex(0)) for i in range(3): pos = gt.arf_layout(g, d = 1, a = 5, max_iter=0) # good #pos = gt.fruchterman_reingold_layout(g, n_iter=1000) #pos = gt.sfdp_layout(g, C = 1) #pos = gt.circle_layout(g) #gt.graph_draw(g, pos = pos, vertex_text=g.vertex_properties["name"], vertex_font_size=20, vertex_size=10, vertex_color = 'white', vertex_fill_color = 'blue', vertex_text_position=0, output_size=(2000, 1000), output="imgs/small_graph_top_" + str(i) + ".pdf") #gt.graph_draw(g, pos = pos, vertex_text=g.vertex_properties["name"], vertex_font_size=20, vertex_size=10, vertex_color = 'white', vertex_fill_color = 'blue', vertex_text_position=0, output_size=(2000, 1000), output="imgs/small_graph_top_" + str(i) + ".png") state = gt.minimize_blockmodel_dl(g) # , deg_corr=True, B_min = 10 state.draw(pos=pos, vertex_shape=state.get_blocks(), vertex_text=g.vertex_properties["name"], vertex_font_size=20, vertex_size=20, edge_pen_width = 2, vertex_text_position=0, output="small_graph_top/small_graph_top_blocks_mdl_" + str(i) + ".pdf", output_size=(1500, 1000), fit_view=1.1) state.draw(pos=pos, vertex_shape=state.get_blocks(), vertex_text=g.vertex_properties["name"], vertex_font_size=20, vertex_size=20, edge_pen_width = 2, vertex_text_position=0, output="small_graph_top/small_graph_top_blocks_mdl_" + str(i) + ".png", output_size=(1500, 1000), fit_view=1.1) print(i) #gt.draw_hierarchy(state, layout="sfdp", vertex_text=g.vertex_properties["name"], vertex_font_size=24, vertex_text_position="centered", edge_color=g.edge_properties["color"], output_size=(2000, 1000), output="small_graph_mdl.pdf", fit_view = 0.8, hide = 2) print(vchrom) print(np.array(vchrom)) state = gt.NestedBlockState(g, [np.array(vchrom), np.arange(0, 22)]) gt.draw_hierarchy(state, vertex_text=g.vertex_properties["name"], vertex_font_size=24, vertex_text_position="centered", output_size=(2000, 1000), output="small_graph_top/small_graph_top_mdl.pdf", fit_view = 0.8, hide = 2)
def build_block_model(useOnDemand, viewer_condition, content_condition, size, use_deg_corr, use_edge_weights, savedDir): g, engagement_df, viewers, content = build_tree(useOnDemand, viewer_condition, content_condition, size, savedDir) state_args = dict(recs=[g.ep.engagement], rec_types=[ "real-exponential" ]) if use_edge_weights else dict() state = gt.minimize_blockmodel_dl(g, state_args=state_args, deg_corr=use_deg_corr) b = state.get_blocks() verticies = g.get_vertices() results = { "entropy": state.entropy(), "results": [], "edges": engagement_df.to_dict('records'), "viewers": viewers, "content": content } counter = {"count": 0} for i, v in enumerate(verticies): matching = [x for x in results.get("results") if x.get('name') == b[i]] children = [] block = {} if matching: # the block was found, so get it's children array so we can append a value block = matching[0] children = block.get("children") else: # the block does not yet have an entry in the result set, so add one counter['count'] += 1 (content_aggs, viewer_aggs) = initAggregates() block = { "id": counter.get('count'), "name": b[i], "children": children, "viewer_count": 0, "content_count": 0, "viewer_aggs": viewer_aggs, "content_aggs": content_aggs } results['results'].append(block) if v_is_content[v]: # the vertex is content, so check whether we already have a list of content for this block block['content_count'] += 1 match = [x for x in children if x.get('name') == 'Content'] content = {"content": []} if match: # We have a content list, so get it content = match[0]['children'][0] else: # create a new content list counter['count'] += 1 children.append({ "id": counter.get('count'), 'name': 'Content', 'children': [content] }) result_item = get_result_item(v) aggregate_content(result_item, block.get("content_aggs")) content['content'].append(result_item) else: # the vertex is a viewer, so check whether we already have a list of viewers for this block block['viewer_count'] += 1 match = [x for x in children if x.get('name') == 'Viewers'] viewers = {"viewers": []} if match: # we have a viewer list, so get it viewers = match[0]['children'][0] else: # create a new viewer list counter['count'] += 1 children.append({ "id": counter.get('count'), 'name': 'Viewers', 'children': [viewers] }) result_item = get_result_item(v) aggregate_viewers(result_item, block.get("viewer_aggs")) viewers["viewers"].append(result_item) return results
AccVer = graph.vp.AccessionVersion[v] PtuManual = graph.vp.PtuManual[v] CComp = str(graph.vp.CComp[v]) BlockCC = bcc[v] Block = str(b[v]) f.write(AccVer + "\t" + PtuManual + "\t" + CComp + "\t" + BlockCC + "\t" + Block + "\n") f.close() # Find the initial model with minimum entropy nToss = 100 state_list, entropy_list = [], [] for k in range(nToss): # Regular stochastic block model (SBM) state = gt.minimize_blockmodel_dl(g, deg_corr=False) entropy = state.entropy() # Update state state_list.append(state) entropy_list.append(entropy) nClass = len(np.unique(state.get_blocks().a)) print("Toss %d of %d: %d classes, entropy %f" % (k, nToss, nClass, entropy)) # Save graph fname = "SBM_%d_%f" % (nClass, entropy) write_classes(os.path.join(outdir, fname + ".tsv"), g, state) pickle.dump([g, state], open(os.path.join(outdir, fname + ".pickle"), "wb"), -1) g.save(os.path.join(outdir, fname + ".gt.gz"))
def compute_local_blocks(self): state = gt.minimize_blockmodel_dl(self.g, verbose=True) b = state.b mod = gt.modularity(self.g, b) print mod return b
def run_analysis(netfile, compnet_files): ''' Run the analysis. :param netfile: Filename of the network to analyze :param compnet_files: List of filenames of the comparison networks, viz., the high-energy physics networks. ''' # Timestamp # -------------------- print(datetime.now()) # Load the network # -------------------- net, outfile_pre, core_pmap, core_vertices = load_net(netfile + '.graphml', core = True, filter = True) output_folder = 'output/' outfile_pre = output_folder + outfile_pre # Plotting print('Plotting') layout = layout_and_plot(net, core_pmap, outfile_pre) # Store the layout in the net net.vp['layout'] = layout # Show only the core vertices net.set_vertex_filter(core_pmap) layout_and_plot(net, core_pmap, outfile_pre, filename_mod = '.core.net', reverse_colors = True) net.set_vertex_filter(None) # Vertex statistics # -------------------- # ECDF for out-degree distribution degree_dist(net, core_vertices, outfile = outfile_pre, show_plot = False, save_plot = True) # ECDF for eigenvector centrality ## Currently this is causing a segmentation fault # ev_centrality_dist(net, core_vertices, outfile = outfile_pre, # show_plot = False, save_plot = True) # Modularity # -------------------- # Calculate modularity, using the core vertices as the partition modularity = gtcomm.modularity(net, core_pmap) print('Observed modularity: ' + str(modularity)) obs_ins = insularity(net, core_pmap) print('Observed insularity: ' + str(obs_ins)) # Calculate the number of core vertices n_core = len(core_vertices) # Construct a sampling distribution for the modularity statistic # And use it to calculate a p-value for the modularity print('Random sample modularity') modularity_sample_dist(net, n_core, modularity, outfile = outfile_pre + '.mod', show_plot = False, save_plot = True) print('Random sample insularities') modularity_sample_dist(net, n_core, obs_ins, mod_func = insularity, outfile = outfile_pre + '.ins', show_plot = False, save_plot = True) # Information-theoretic partitioning print('Information-theoretic partitioning') # Calculate the partition gt.seed_rng(5678) np.random.seed(5678) part_block = gt.minimize_blockmodel_dl(net, B_min = 2, B_max = 2, verbose = True, overlap = False) # Extract the block memberships as a pmap net.vp['partition'] = part_block.get_blocks() # Calculate the modularity block_modularity = gtcomm.modularity(net, net.vp['partition']) print('Partion modularity: ' + str(block_modularity)) print('Partition insularities') block_insularities = partition_insularity(net, net.vp['partition']) for community in block_insularities: print('Community ' + str(community) + ': ' + str(block_insularities[community])) print('Plotting') size_pmap = gt.prop_to_size(core_pmap, mi = 10, ma = 20) layout_and_plot(net, net.vp['partition'], outfile_pre, size_pmap = size_pmap, filename_mod = '.partition') # Modularity optimization optimal_sample_dist(net, modularity, obs_ins, outfile = outfile_pre, show_plot = False, save_plot = True) # Save results # -------------------- # The above covers all of the analysis to be written into the output files, # so we'll go ahead and save things now. print('Saving') # Save in graph-tool's binary format net.save(outfile_pre + '.out' + '.gt') # Replace vector-type properties with strings #net.list_properties() properties = net.vertex_properties for property_key in properties.keys(): property = properties[property_key] if 'vector' in property.value_type(): properties[property_key] = property.copy(value_type = 'string') # Save as graphml net.save(outfile_pre + '.out' + '.graphml') # Comparison networks # -------------------- for compnet_file in compnet_files: # Load the comparison network compnet, compnet_outfile = load_net(compnet_file) # Set it to the same directedness as the network of interest compnet.set_directed(net.is_directed()) # Size of compnet n_compnet = compnet.num_vertices() # Num vertices in compnet to use in each random partition k_compnet = round(n_core / net.num_vertices() * n_compnet) # Sample distribution based on random partition print('Random sample modularities') print('Observed modularity: ' + str(modularity)) modularity_sample_dist(compnet, k_compnet, modularity, outfile = outfile_pre + '.mod.' + compnet_outfile, show_plot = False, save_plot = True) print('Random sample insularities') print('Observed insularity: ' + str(obs_ins)) modularity_sample_dist(compnet, k_compnet, obs_ins, mod_func = insularity, outfile = outfile_pre + '.ins.' + compnet_outfile, show_plot = False, save_plot = True) # Sample distribution based on optimizing modularity # optimal_sample_dist(compnet, modularity, n_samples = 300, # outfile = outfile_pre + '.mod.' + compnet_outfile, # show_plot = False) # Timestamp # -------------------- print(datetime.now()) # Visually separate analyses print('-'*40)
"mag_journal_id": nodes, "community_id": cids }) if __name__ == "__main__": OUTPUT = sys.argv.pop() YEARS = [int(y) for y in sys.argv[1:]] print("years", YEARS) print("Loading networks") A, Araw, nodes = utils.load_network(YEARS) print("Construct graph tool graph object") G = to_graph_tool(A) print("Estimating") states = gt.minimize_blockmodel_dl( G, deg_corr=True, state_args=dict(eweight=G.ep.weight), verbose=True, B_max=np.round(A.shape[0] / 3).astype(int), ) print("Save") community_table = make_community_table(states, nodes) community_table.to_csv(OUTPUT, sep="\t")
#import networkx as nx #module load graph_tool #from graph_tool.all import * import graph_tool.all as gt import matplotlib.pyplot as plt ht = "ilmastonmuutos" g = gt.load_graph(ht + "/" + ht + "_retweet_network_giant.gml") #g = gt.Graph() #bmodel = gt.minimize_nested_blockmodel_dl(g, B_min=2, B_max=2) current_bmodel = gt.minimize_blockmodel_dl(g, B_min=2, B_max=2, deg_corr=True, verbose=True) #for _ in range(100): # bmodel = gt.minimize_blockmodel_dl(g, B_min=2, B_max=2, deg_corr=True, verbose=True) # if (bmodel.entropy() < current_bmodel.entropy()): # current_bmodel = bmodel # else: # continue #print(len(bmodel.get_bs())) #bmodel.draw() current_bmodel.draw() b = current_bmodel.get_blocks() community = g.new_vertex_property("int16_t") for v in g.vertices(): community[v] = b[v]
load_true_partition=False, strm_piece_num=part, out_neighbors=out_neighbors, in_neighbors=in_neighbors) else: out_neighbors, in_neighbors, N, E, true_partition = load_graph( input_filename, load_true_partition=True) input_graph = gt.Graph() input_graph.add_edge_list([(i, j) for i in range(len(out_neighbors)) if len(out_neighbors[i]) > 0 for j in out_neighbors[i][:, 0]]) t0 = timeit.default_timer() # the parallel switch determines whether MCMC updates are run in parallel, epsilon is the convergence threshold for # the nodal updates (smaller value is stricter), and the verbose option prints updates on each step of the algorithm. # Please refer to the graph-tool documentation under graph-tool.inference for details on the input parameters if args.threads > 0: gt.openmp_set_num_threads(args.threads) graph_tool_partition = gt.minimize_blockmodel_dl(input_graph, mcmc_args={'parallel': True}, mcmc_equilibrate_args={ 'verbose': False, 'epsilon': 1e-4 }, verbose=True) t1 = timeit.default_timer() print('\nGraph partition took {} seconds'.format(t1 - t0)) evaluate_partition(true_partition, graph_tool_partition.get_blocks().a)
elif (args.weight_model == 'Exponential'): state = gt.minimize_nested_blockmodel_dl( g, deg_corr=args.deg_corr, state_args=dict(recs=[y], rec_types=['real-exponential'])) else: state = gt.minimize_nested_blockmodel_dl( g, deg_corr=args.deg_corr, state_args=dict(recs=[y], rec_types=['real-normal'])) state_0 = state.get_levels()[0] nClass = len(np.unique(state_0.get_blocks().a)) else: # Flat stochastic block model (SBM) if (args.weight_model == 'None'): state = gt.minimize_blockmodel_dl(g, deg_corr=args.deg_corr) elif (args.weight_model == 'Exponential'): state = gt.minimize_blockmodel_dl( g, deg_corr=args.deg_corr, state_args=dict(recs=[y], rec_types=['real-exponential'])) else: state = gt.minimize_blockmodel_dl(g, deg_corr=args.deg_corr, state_args=dict( recs=[y], rec_types=['real-normal'])) nClass = len(np.unique(state.get_blocks().a)) entropy = state.entropy() # Update state
def find_communities(nnodes, edges, alg, params=None): def membership2cs(membership): cs = {} for i, m in enumerate(membership): cs.setdefault(m, []).append(i) return cs.values() def connected_subgraphs(G: nx.Graph): for comp in nx.connected_components(G): sub = nx.induced_subgraph(G, comp) sub = nx.convert_node_labels_to_integers(sub, label_attribute='old') yield sub def apply_subgraphs(algorithm, **params): cs = [] for sub in connected_subgraphs(G): if len(sub.nodes) <= 3: coms = [sub.nodes] # let it be a cluster else: coms = algorithm(sub, **params) if hasattr(coms, 'communities'): coms = coms.communities for com in coms: cs.append([sub.nodes[i]['old'] for i in set(com)]) return cs def karate_apply(algorithm, graph, **params): model = algorithm(**params) model.fit(graph) return membership2cs(model.get_memberships().values()) if alg == 'big_clam': c = -1 if params['c'] == 'auto' else int(params['c']) cs = BigClam('../../snap').run(edges, c=c, xc=int(params['xc'])) elif alg in ('gmm', 'kclique', 'lprop', 'lprop_async', 'fluid', 'girvan_newman', 'angel', 'congo', 'danmf', 'egonet_splitter', 'lfm', 'multicom', 'nmnf', 'nnsed', 'node_perception', 'slpa', 'GEMSEC', 'EdMot', 'demon'): G = nx.Graph() G.add_edges_from(edges) if alg == 'gmm': cs = community.greedy_modularity_communities(G) elif alg == 'kclique': params = {k: float(v) for k, v in params.items()} cs = community.k_clique_communities(G, **params) elif alg == 'lprop': cs = community.label_propagation_communities(G) elif alg == 'lprop_async': cs = community.asyn_lpa_communities(G, seed=0) elif alg == 'fluid': params = {k: int(v) for k, v in params.items()} params['seed'] = 0 cs = apply_subgraphs(community.asyn_fluidc, **params) elif alg == 'girvan_newman': comp = community.girvan_newman(G) for cs in itertools.islice(comp, int(params['k'])): pass elif alg == 'angel': params = {k: float(v) for k, v in params.items()} cs = cdlib.angel(G, **params).communities elif alg == 'congo': # too slow ncoms = int(params['number_communities']) cs = [] for sub in connected_subgraphs(G): if len(sub.nodes) <= max(3, ncoms): cs.append(sub.nodes) # let it be a cluster else: coms = cdlib.congo(sub, number_communities=ncoms, height=int(params['height'])) for com in coms.communities: cs.append([sub.nodes[i]['old'] for i in set(com)]) elif alg == 'danmf': # no overlapping cs = apply_subgraphs(cdlib.danmf) elif alg == 'egonet_splitter': params['resolution'] = float(params['resolution']) cs = apply_subgraphs(cdlib.egonet_splitter, **params) elif alg == 'lfm': coms = cdlib.lfm(G, float(params['alpha'])) cs = coms.communities elif alg == 'multicom': cs = cdlib.multicom(G, seed_node=0).communities elif alg == 'nmnf': params = {k: int(v) for k, v in params.items()} cs = apply_subgraphs(cdlib.nmnf, **params) elif alg == 'nnsed': cs = apply_subgraphs(cdlib.nnsed) elif alg == 'node_perception': # not usable params = {k: float(v) for k, v in params.items()} cs = cdlib.node_perception(G, **params).communities elif alg == 'slpa': params["t"] = int(params["t"]) params["r"] = float(params["r"]) cs = cdlib.slpa(G, **params).communities elif alg == 'demon': params = {k: float(v) for k, v in params.items()} cs = cdlib.demon(G, **params).communities elif alg == 'GEMSEC': # gamma = float(params.pop('gamma')) params = {k: int(v) for k, v in params.items()} # params['gamma'] = gamma params['seed'] = 0 _wrap = partial(karate_apply, karateclub.GEMSEC) cs = apply_subgraphs(_wrap, **params) elif alg == 'EdMot': params = {k: int(v) for k, v in params.items()} _wrap = partial(karate_apply, karateclub.EdMot) cs = apply_subgraphs(_wrap, **params) elif alg in ('infomap', 'community_leading_eigenvector', 'leig', 'multilevel', 'optmod', 'edge_betweenness', 'spinglass', 'walktrap', 'leiden', 'hlc'): G = igraph.Graph() G.add_vertices(nnodes) G.add_edges(edges) if alg == 'infomap': vcl = G.community_infomap(trials=int(params['trials'])) cs = membership2cs(vcl.membership) elif alg == 'leig': clusters = None if params['clusters'] == 'auto' else int( params['clusters']) vcl = G.community_leading_eigenvector(clusters=clusters) cs = membership2cs(vcl.membership) elif alg == 'multilevel': vcl = G.community_multilevel() cs = membership2cs(vcl.membership) elif alg == 'optmod': # too long membership, modularity = G.community_optimal_modularity() cs = membership2cs(vcl.membership) elif alg == 'edge_betweenness': clusters = None if params['clusters'] == 'auto' else int( params['clusters']) dendrogram = G.community_edge_betweenness(clusters, directed=False) try: clusters = dendrogram.as_clustering() except: return [] cs = membership2cs(clusters.membership) elif alg == 'spinglass': # only for connected graph vcl = G.community_spinglass(parupdate=True, update_rule=params['update_rule'], start_temp=float(params['start_temp']), stop_temp=float(params['stop_temp'])) cs = membership2cs(vcl.membership) elif alg == 'walktrap': dendrogram = G.community_walktrap(steps=int(params['steps'])) try: clusters = dendrogram.as_clustering() except: return [] cs = membership2cs(clusters.membership) elif alg == 'leiden': vcl = G.community_leiden( objective_function=params['objective_function'], resolution_parameter=float(params['resolution_parameter']), n_iterations=int(params['n_iterations'])) cs = membership2cs(vcl.membership) elif alg == 'hlc': algorithm = HLC(G, min_size=int(params['min_size'])) cs = algorithm.run(None) elif alg in ("sbm", "sbm_nested"): np.random.seed(42) gt.seed_rng(42) G = gt.Graph(directed=False) G.add_edge_list(edges) deg_corr = bool(params['deg_corr']) B_min = None if params['B_min'] == 'auto' else int(params['B_min']) B_max = None if params['B_max'] == 'auto' else int(params['B_max']) if alg == "sbm": state = gt.minimize_blockmodel_dl(G, deg_corr=deg_corr, B_min=B_min, B_max=B_max) membership = state.get_blocks() cs = membership2cs(membership) if alg == "sbm_nested": state = gt.minimize_nested_blockmodel_dl(G, deg_corr=deg_corr, B_min=B_min, B_max=B_max) levels = state.get_bs() level_max = int(params['level']) membership = {} for nid in range(nnodes): cid = nid level_i = len(levels) for level in levels: cid = level[cid] if level_i == level_max: membership.setdefault(cid, []).append(nid) break level_i -= 1 cs = membership.values() else: return None return list(cs)
ecolor3[e] = "#8073ac" esize3[e] = 1 for e in G4.edges(): source = int(e.source()) target = int(e.target()) if (source, target) in in_edges4 or (target, source) in in_edges4: ecolor4[e] = "#e08214" esize4[e] = 2.5 else: ecolor4[e] = "#8073ac" esize4[e] = 1 ############draw graph##################### state = gt.minimize_blockmodel_dl(G2) b = state.b gt.graph_draw(G1, pos1, vertex_size=15, vertex_shape="circle", vertex_fill_color="#3f3f3f", edge_color=ecolor1, edge_pen_width=esize1, output="1.network1.pdf") gt.graph_draw(G2, pos2, vertex_size=15, vertex_shape="circle", vertex_fill_color="#3f3f3f",