def infer_sbm(G, B_min=2, degree_corrected=False): """ Use `graph-tool` to infer blocks in a graph. Parameters ---------- G : nx.Graph B_min : int minimum number of blocks degree_corrected : bool is degree corrected or not Returns ------- nx.Graph networkX Graph with a block id attribute for each node. """ mapping_ = {} g = nx2gt(G) for ix, n1 in enumerate(list(g.vertices())): index_ = n1.__int__() mapping_[int(g.vertex_properties["id"][index_])] = ix state = minimize_blockmodel_dl(g, B_min=B_min, deg_corr=degree_corrected) for node in mapping_: G.nodes[node]["block"] = state.get_blocks()[mapping_[node]] return G
def findCommunities(filename): trials = 1 fullFile = f'{filename}.graphml' print(fullFile) graph = gt.load_graph(fullFile) lowest_entropy = np.inf best_community = None for i in range(trials): state = inference.minimize_blockmodel_dl(graph, deg_corr=True, verbose=True) b = state.get_blocks() print(state.entropy()) if state.entropy() < lowest_entropy: best_community = b lowest_entropy = state.entropy() communityMapping = dict() nodeList = list() communityID = list() for v in graph.vertices(): nodeList.append(str(graph.vertex_properties["_graphml_vertex_id"][v])) communityID.append(str(best_community[v])) communityMapping['NODE_ID'] = nodeList communityMapping['COMMUNITY_ID'] = communityID return communityMapping
def graph_tool_community(self, G, k): gtg, gtgv, gtge = self.networkx2graph_tool(G) gttemp = inference.minimize_blockmodel_dl(gtg, B_min=k, B_max=k) labels = np.array(gttemp.b.get_array()) partition = {} for e, x in enumerate(gtgv): #Why for e and x? partition[x] = int( labels[e]) + 1 #Gives the # of nodes in each community? return partition
def fit_predict(self, data): graph = induce_graph(data, distance=self.metric) result_blocks = [] weights = graph.edge_properties['weights'].get_array() for threshold in np.linspace(0, weights.max(), self.cutoff_interval): working_graph = cutoff(graph, threshold, inplace=True) # Apply the sbm to the pruned graph blocks = minimize_blockmodel_dl(working_graph) blocks = blocks.get_blocks().get_array() # Silhouette doesn't work if there's only one cluster label if len(np.unique(blocks)) > 1: cutoff_score = silhouette_score(data, blocks) result_blocks.append((cutoff_score, blocks)) return np.array(max(result_blocks)[1])
def run_minimize_blockmodel(mg, temp_loc): # save to temp nx.write_graphml(mg.g, temp_loc) # load into graph-tool from temp g = load_graph(temp_loc, fmt="graphml") total_degrees = g.get_total_degrees(g.get_vertices()) remove_verts = np.where(total_degrees == 0)[0] g.remove_vertex(remove_verts) min_state = minimize_blockmodel_dl(g, verbose=False) blocks = list(min_state.get_blocks()) verts = g.get_vertices() block_map = {} for v, b in zip(verts, blocks): cell_id = int(g.vertex_properties["_graphml_vertex_id"][v]) block_map[cell_id] = int(b) block_series = pd.Series(block_map) block_series.name = "block_label" return block_series
def sbm_clustering_nmi_silhouette(data, y, threshold, n_tries=10): """Apply the SBM model to graphs with different thresholds. Due to sbm randomness, repeat the process `n` times and return the mean silhouette score. """ graph = induce_graph(data) graph = cutoff(graph, threshold, inplace=True) nmi_scores, silhouette_scores = [], [] for _ in range(n_tries): # Apply the sbm to the pruned graph blocks = minimize_blockmodel_dl(graph) blocks = blocks.get_blocks().get_array() # Compute the nmi and silhouette score for evaluation nmi_scores.append(normalized_mutual_info_score(y, blocks)) silhouette_scores.append( silhouette_score(data, blocks) if len(np.unique(blocks)) > 1 else 0 ) return np.mean(nmi_scores), np.mean(silhouette_scores)
def _run_stochastic_block_model(self, weight=None, weight_model="discrete-binomial", nested=False, min_communities=None, max_communities=None, **kwargs): state_args = dict() if weight: state_args = dict(recs=[self.graph.ep[weight]], rec_types=[weight_model]) if not nested and not weight: state = minimize_blockmodel_dl(self.graph, B_min=min_communities, B_max=max_communities) partition = state.get_blocks() else: state = minimize_nested_blockmodel_dl(self.graph, B_min=min_communities, B_max=max_communities, state_args=state_args) blocks = [l.get_blocks().a for l in state.get_levels()] partition = [] for el in blocks[0]: communities = [el] prev_block = el if len(blocks) > 1: for b in blocks[1:]: communities.append(b[prev_block]) prev_block = b[prev_block] else: communities = [el] communities.reverse() if not nested: partition.append(communities[-1]) else: partition.append(communities) return partition
def run_minimize_blockmodel(mg, temp_loc=None, weight_model=None): meta = mg.meta.copy() meta = pd.DataFrame(mg.meta["neuron_name"]) mg = MetaGraph(mg.adj, meta) if temp_loc is None: temp_loc = f"maggot_models/data/interim/temp-{np.random.randint(1e8)}.graphml" # save to temp nx.write_graphml(mg.g, temp_loc) # load into graph-tool from temp g = load_graph(temp_loc, fmt="graphml") os.remove(temp_loc) total_degrees = g.get_total_degrees(g.get_vertices()) remove_verts = np.where(total_degrees == 0)[0] g.remove_vertex(remove_verts) if weight_model is not None: recs = [g.ep.weight] rec_types = [weight_model] else: recs = [] rec_types = [] state_args = dict(recs=recs, rec_types=rec_types) min_state = minimize_blockmodel_dl(g, verbose=False, state_args=state_args) blocks = list(min_state.get_blocks()) verts = g.get_vertices() block_map = {} for v, b in zip(verts, blocks): cell_id = int(g.vertex_properties["_graphml_vertex_id"][v]) block_map[cell_id] = int(b) block_series = pd.Series(block_map) block_series.name = "block_label" return block_series
def get_sbm_communities(G): """ Returns the community partition identified by the SBM method, i.e. a list containing the community assignment of each node. Note: the algorithm runs multiple trials due to the stochastic nature of the method (selecting the partition with the minimum description length). The number of trials can be varied in the code, if required. Reference for SBM method: T.P. Peixoto, Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models, Physical Review E, 89 (2014). Parameters ---------- G : NetworkX-formatted network """ trials = 3 # the number of trials to run of the SBM algorithm Gto = convert_to_gt(G) n = nx.number_of_nodes(G) desc_len = [0] * trials partitioning = [0] * trials # run multiple trials and select the partition with the minimum description length for trial in range(trials): partitioning_trial = gt.minimize_blockmodel_dl(Gto, B_min=1, B_max=n) desc_len[trial] = partitioning_trial.entropy( ) # description length of a fit (negative log-likelihood) partitioning[trial] = partitioning_trial mn, idx = min((desc_len[i], i) for i in range(0, len(desc_len))) sbm_communities = partitioning[ idx] # the partitioning with the min description length sbm_communities = [ x + 1 for x in list(sbm_communities.get_blocks().get_array()) ] # convert to list format return (sbm_communities)
samplestack = SampleStack(args) sampled_graph, sampled_graph_partition, vertex_mapping, block_mapping, evaluation = samplestack.unstack( args) full_graph, full_graph_partition, evaluation = samplestack.extrapolate_sample_partition( sampled_graph_partition, vertex_mapping, args, evaluation) else: graph, true_block_assignment = load_graph(args) t_load = timeit.default_timer() t_sample = timeit.default_timer() print("Performing stochastic block partitioning") evaluation = Evaluation(args, graph) # Please refer to the graph-tool documentation under graph-tool.inference for details on the input parameters partition = minimize_blockmodel_dl(graph, shrink_args={'parallel': True}, verbose=args.verbose, mcmc_equilibrate_args={ 'verbose': args.verbose, 'epsilon': 1e-4 }) t_partition = timeit.default_timer() t_end = timeit.default_timer() print('\nGraph partition took {} seconds'.format(t_end - t_start)) evaluation.total_runtime(t_start, t_end) if args.sample_type != "none": print("===== Evaluating graph sampling =====") evaluation.evaluate_sampling(full_graph, sampled_graph, full_graph_partition, sampled_graph_partition, block_mapping, vertex_mapping,
def unstack( self, args: argparse.Namespace, sampled_graph_partition: BlockState = None, evaluation: Evaluation = None ) -> Tuple[Graph, BlockState, Dict, Dict, Evaluation]: """Performs SBP on the first (innermost) sample. Merges said sample with the next in the stack, and performs SBP on the combined results. Repeats the process until all samples have been partitioned. Paramters --------- args : argparse.Namespace the command-line arguments supplied by the user sampled_graph_partition : BlockState the current partitioned state of the sampled graph. Default = None evaluation : Evaluation the current state of the evaluation of the algorithm. Default = None Returns ------- sampled_graph : Graph the Graph object describing the combined samples sampled_graph_partition : BlockState the partition results of the combined samples vertex_mapping : Dict[int, int] the mapping of the vertices from the combined sample to the full graph block_mapping : Dict[int, int] the mapping of the communities/blocks from the combined sample to the full graph """ # Propagate results back through the stack sampled_graph, sample = self._pop() min_num_blocks = -1 # denominator = 2 # if args.sample_iterations > 1: # min_num_blocks = int(sampled_graph.num_nodes / denominator) # min_num_blocks = 0 if evaluation is None: evaluation = Evaluation(args, sampled_graph) print("Subgraph: V = {} E = {}".format(sampled_graph.num_vertices(), sampled_graph.num_edges())) t0 = timeit.default_timer() combined_partition = minimize_blockmodel_dl( sampled_graph, shrink_args={'parallel': True}, verbose=args.verbose, mcmc_equilibrate_args={ 'verbose': args.verbose, 'epsilon': 1e-4 }) evaluation.sampled_graph_partition_time += (timeit.default_timer() - t0) combined_sampled_graph = sampled_graph while len(self.stack) > 0: sampled_graph, next_sample = self._pop() t0 = timeit.default_timer() sample_partition = minimize_blockmodel_dl( sampled_graph, shrink_args={'parallel': True}, verbose=args.verbose, mcmc_equilibrate_args={ 'verbose': args.verbose, 'epsilon': 1e-4 }) evaluation.sampled_graph_partition_time += ( timeit.default_timer() - t0) t1 = timeit.default_timer() # TODO: fix this to allow multi-sample strategies combined_partition, combined_sampled_graph, sample = self.combine_partition_with_sample( combined_partition, sample_partition, sample, next_sample, args) t2 = timeit.default_timer() # TODO: change to evaluation.merge_sample time? evaluation.propagate_membership += (t2 - t1) print("=====Performing final (combined) sample partitioning=====") if min_num_blocks > 0 or (args.sample_iterations > 1): combined_partition.num_blocks_to_merge = 0 sampled_graph_partition = minimize_blockmodel_dl( combined_sampled_graph, shrink_args={'parallel': True}, verbose=args.verbose, mcmc_equilibrate_args={ 'verbose': False, 'epsilon': 1e-4 }) else: sampled_graph_partition = combined_partition return (combined_sampled_graph, sampled_graph_partition, sample.vertex_mapping, sample.true_blocks_mapping, evaluation)