Exemple #1
0
def infer_sbm(G, B_min=2, degree_corrected=False):
    """
    Use `graph-tool` to infer blocks in a graph.
    Parameters
    ----------
    G : nx.Graph
    B_min : int
        minimum number of blocks
    degree_corrected : bool
        is degree corrected or not

    Returns
    -------
    nx.Graph
        networkX Graph with a block id attribute for each node.
    """
    mapping_ = {}
    g = nx2gt(G)
    for ix, n1 in enumerate(list(g.vertices())):
        index_ = n1.__int__()
        mapping_[int(g.vertex_properties["id"][index_])] = ix

    state = minimize_blockmodel_dl(g, B_min=B_min, deg_corr=degree_corrected)

    for node in mapping_:
        G.nodes[node]["block"] = state.get_blocks()[mapping_[node]]
    return G
def findCommunities(filename):
    trials = 1
    fullFile = f'{filename}.graphml'
    print(fullFile)
    graph = gt.load_graph(fullFile)
    lowest_entropy = np.inf
    best_community = None
    for i in range(trials):
        state = inference.minimize_blockmodel_dl(graph,
                                                 deg_corr=True,
                                                 verbose=True)
        b = state.get_blocks()
        print(state.entropy())
        if state.entropy() < lowest_entropy:
            best_community = b
            lowest_entropy = state.entropy()
    communityMapping = dict()
    nodeList = list()
    communityID = list()
    for v in graph.vertices():
        nodeList.append(str(graph.vertex_properties["_graphml_vertex_id"][v]))
        communityID.append(str(best_community[v]))
    communityMapping['NODE_ID'] = nodeList
    communityMapping['COMMUNITY_ID'] = communityID
    return communityMapping
Exemple #3
0
 def graph_tool_community(self, G, k):
     gtg, gtgv, gtge = self.networkx2graph_tool(G)
     gttemp = inference.minimize_blockmodel_dl(gtg, B_min=k, B_max=k)
     labels = np.array(gttemp.b.get_array())
     partition = {}
     for e, x in enumerate(gtgv):  #Why for e and x?
         partition[x] = int(
             labels[e]) + 1  #Gives the # of nodes in each community?
     return partition
Exemple #4
0
    def fit_predict(self, data):
        graph = induce_graph(data, distance=self.metric)

        result_blocks = []

        weights = graph.edge_properties['weights'].get_array()
        for threshold in np.linspace(0, weights.max(), self.cutoff_interval):
            working_graph = cutoff(graph, threshold, inplace=True)
            # Apply the sbm to the pruned graph
            blocks = minimize_blockmodel_dl(working_graph)
            blocks = blocks.get_blocks().get_array()

            # Silhouette doesn't work if there's only one cluster label
            if len(np.unique(blocks)) > 1:
                cutoff_score = silhouette_score(data, blocks)
                result_blocks.append((cutoff_score, blocks))

        return np.array(max(result_blocks)[1])
Exemple #5
0
def run_minimize_blockmodel(mg, temp_loc):
    # save to temp
    nx.write_graphml(mg.g, temp_loc)
    # load into graph-tool from temp
    g = load_graph(temp_loc, fmt="graphml")
    total_degrees = g.get_total_degrees(g.get_vertices())
    remove_verts = np.where(total_degrees == 0)[0]
    g.remove_vertex(remove_verts)
    min_state = minimize_blockmodel_dl(g, verbose=False)

    blocks = list(min_state.get_blocks())
    verts = g.get_vertices()

    block_map = {}

    for v, b in zip(verts, blocks):
        cell_id = int(g.vertex_properties["_graphml_vertex_id"][v])
        block_map[cell_id] = int(b)

    block_series = pd.Series(block_map)
    block_series.name = "block_label"
    return block_series
Exemple #6
0
def sbm_clustering_nmi_silhouette(data, y, threshold, n_tries=10):
    """Apply the SBM model to graphs with different thresholds.

    Due to sbm randomness, repeat the process `n` times and return the mean
    silhouette score.

    """
    graph = induce_graph(data)
    graph = cutoff(graph, threshold, inplace=True)

    nmi_scores, silhouette_scores = [], []
    for _ in range(n_tries):
        # Apply the sbm to the pruned graph
        blocks = minimize_blockmodel_dl(graph)
        blocks = blocks.get_blocks().get_array()
        # Compute the nmi and silhouette score for evaluation
        nmi_scores.append(normalized_mutual_info_score(y, blocks))
        silhouette_scores.append(
            silhouette_score(data, blocks) if len(np.unique(blocks)) > 1 else 0
        )

    return np.mean(nmi_scores), np.mean(silhouette_scores)
Exemple #7
0
    def _run_stochastic_block_model(self,
                                    weight=None,
                                    weight_model="discrete-binomial",
                                    nested=False,
                                    min_communities=None,
                                    max_communities=None,
                                    **kwargs):
        state_args = dict()
        if weight:
            state_args = dict(recs=[self.graph.ep[weight]],
                              rec_types=[weight_model])

        if not nested and not weight:
            state = minimize_blockmodel_dl(self.graph,
                                           B_min=min_communities,
                                           B_max=max_communities)
            partition = state.get_blocks()
        else:
            state = minimize_nested_blockmodel_dl(self.graph,
                                                  B_min=min_communities,
                                                  B_max=max_communities,
                                                  state_args=state_args)
            blocks = [l.get_blocks().a for l in state.get_levels()]
            partition = []
            for el in blocks[0]:
                communities = [el]
                prev_block = el
                if len(blocks) > 1:
                    for b in blocks[1:]:
                        communities.append(b[prev_block])
                        prev_block = b[prev_block]
                else:
                    communities = [el]
                communities.reverse()
                if not nested:
                    partition.append(communities[-1])
                else:
                    partition.append(communities)
        return partition
Exemple #8
0
def run_minimize_blockmodel(mg, temp_loc=None, weight_model=None):
    meta = mg.meta.copy()
    meta = pd.DataFrame(mg.meta["neuron_name"])
    mg = MetaGraph(mg.adj, meta)
    if temp_loc is None:
        temp_loc = f"maggot_models/data/interim/temp-{np.random.randint(1e8)}.graphml"
    # save to temp
    nx.write_graphml(mg.g, temp_loc)
    # load into graph-tool from temp
    g = load_graph(temp_loc, fmt="graphml")
    os.remove(temp_loc)

    total_degrees = g.get_total_degrees(g.get_vertices())
    remove_verts = np.where(total_degrees == 0)[0]
    g.remove_vertex(remove_verts)

    if weight_model is not None:
        recs = [g.ep.weight]
        rec_types = [weight_model]
    else:
        recs = []
        rec_types = []
    state_args = dict(recs=recs, rec_types=rec_types)
    min_state = minimize_blockmodel_dl(g, verbose=False, state_args=state_args)

    blocks = list(min_state.get_blocks())
    verts = g.get_vertices()

    block_map = {}

    for v, b in zip(verts, blocks):
        cell_id = int(g.vertex_properties["_graphml_vertex_id"][v])
        block_map[cell_id] = int(b)

    block_series = pd.Series(block_map)
    block_series.name = "block_label"
    return block_series
def get_sbm_communities(G):
    """
    Returns the community partition identified by the SBM method,
    i.e. a list containing the community assignment of each node.

    Note: the algorithm runs multiple trials due to the stochastic nature of the method
    (selecting the partition with the minimum description length).
    The number of trials can be varied in the code, if required. 

    Reference for SBM method: T.P. Peixoto, Efficient Monte Carlo and greedy heuristic 
    for the inference of stochastic block models, Physical Review E, 89 (2014).

    Parameters
    ----------
    G : NetworkX-formatted network
    """
    trials = 3  # the number of trials to run of the SBM algorithm
    Gto = convert_to_gt(G)
    n = nx.number_of_nodes(G)
    desc_len = [0] * trials
    partitioning = [0] * trials

    # run multiple trials and select the partition with the minimum description length
    for trial in range(trials):
        partitioning_trial = gt.minimize_blockmodel_dl(Gto, B_min=1, B_max=n)
        desc_len[trial] = partitioning_trial.entropy(
        )  # description length of a fit (negative log-likelihood)
        partitioning[trial] = partitioning_trial

    mn, idx = min((desc_len[i], i) for i in range(0, len(desc_len)))
    sbm_communities = partitioning[
        idx]  # the partitioning with the min description length
    sbm_communities = [
        x + 1 for x in list(sbm_communities.get_blocks().get_array())
    ]  # convert to list format
    return (sbm_communities)
Exemple #10
0
        samplestack = SampleStack(args)
        sampled_graph, sampled_graph_partition, vertex_mapping, block_mapping, evaluation = samplestack.unstack(
            args)
        full_graph, full_graph_partition, evaluation = samplestack.extrapolate_sample_partition(
            sampled_graph_partition, vertex_mapping, args, evaluation)
    else:
        graph, true_block_assignment = load_graph(args)
        t_load = timeit.default_timer()
        t_sample = timeit.default_timer()
        print("Performing stochastic block partitioning")
        evaluation = Evaluation(args, graph)
        # Please refer to the graph-tool documentation under graph-tool.inference for details on the input parameters
        partition = minimize_blockmodel_dl(graph,
                                           shrink_args={'parallel': True},
                                           verbose=args.verbose,
                                           mcmc_equilibrate_args={
                                               'verbose': args.verbose,
                                               'epsilon': 1e-4
                                           })
        t_partition = timeit.default_timer()

    t_end = timeit.default_timer()
    print('\nGraph partition took {} seconds'.format(t_end - t_start))
    evaluation.total_runtime(t_start, t_end)

    if args.sample_type != "none":
        print("===== Evaluating graph sampling =====")
        evaluation.evaluate_sampling(full_graph, sampled_graph,
                                     full_graph_partition,
                                     sampled_graph_partition, block_mapping,
                                     vertex_mapping,
Exemple #11
0
    def unstack(
        self,
        args: argparse.Namespace,
        sampled_graph_partition: BlockState = None,
        evaluation: Evaluation = None
    ) -> Tuple[Graph, BlockState, Dict, Dict, Evaluation]:
        """Performs SBP on the first (innermost) sample. Merges said sample with the next in the stack, and performs
        SBP on the combined results. Repeats the process until all samples have been partitioned.

        Paramters
        ---------
        args : argparse.Namespace
            the command-line arguments supplied by the user
        sampled_graph_partition : BlockState
            the current partitioned state of the sampled graph. Default = None
        evaluation : Evaluation
            the current state of the evaluation of the algorithm. Default = None

        Returns
        -------
        sampled_graph : Graph
            the Graph object describing the combined samples
        sampled_graph_partition : BlockState
            the partition results of the combined samples
        vertex_mapping : Dict[int, int]
            the mapping of the vertices from the combined sample to the full graph
        block_mapping : Dict[int, int]
            the mapping of the communities/blocks from the combined sample to the full graph
        """
        # Propagate results back through the stack
        sampled_graph, sample = self._pop()
        min_num_blocks = -1
        # denominator = 2
        # if args.sample_iterations > 1:
        #     min_num_blocks = int(sampled_graph.num_nodes / denominator)
        #     min_num_blocks = 0
        if evaluation is None:
            evaluation = Evaluation(args, sampled_graph)
        print("Subgraph: V = {} E = {}".format(sampled_graph.num_vertices(),
                                               sampled_graph.num_edges()))
        t0 = timeit.default_timer()
        combined_partition = minimize_blockmodel_dl(
            sampled_graph,
            shrink_args={'parallel': True},
            verbose=args.verbose,
            mcmc_equilibrate_args={
                'verbose': args.verbose,
                'epsilon': 1e-4
            })
        evaluation.sampled_graph_partition_time += (timeit.default_timer() -
                                                    t0)
        combined_sampled_graph = sampled_graph
        while len(self.stack) > 0:
            sampled_graph, next_sample = self._pop()
            t0 = timeit.default_timer()
            sample_partition = minimize_blockmodel_dl(
                sampled_graph,
                shrink_args={'parallel': True},
                verbose=args.verbose,
                mcmc_equilibrate_args={
                    'verbose': args.verbose,
                    'epsilon': 1e-4
                })
            evaluation.sampled_graph_partition_time += (
                timeit.default_timer() - t0)
            t1 = timeit.default_timer()
            # TODO: fix this to allow multi-sample strategies
            combined_partition, combined_sampled_graph, sample = self.combine_partition_with_sample(
                combined_partition, sample_partition, sample, next_sample,
                args)
            t2 = timeit.default_timer()
            # TODO: change to evaluation.merge_sample time?
            evaluation.propagate_membership += (t2 - t1)
        print("=====Performing final (combined) sample partitioning=====")
        if min_num_blocks > 0 or (args.sample_iterations > 1):
            combined_partition.num_blocks_to_merge = 0
            sampled_graph_partition = minimize_blockmodel_dl(
                combined_sampled_graph,
                shrink_args={'parallel': True},
                verbose=args.verbose,
                mcmc_equilibrate_args={
                    'verbose': False,
                    'epsilon': 1e-4
                })
        else:
            sampled_graph_partition = combined_partition
        return (combined_sampled_graph, sampled_graph_partition,
                sample.vertex_mapping, sample.true_blocks_mapping, evaluation)