コード例 #1
0
def roll(graphmlfile, nb_attempts, min_clusters, max_clusters, deg_corr=True):
    g = gt.load_graph(graphmlfile)

    print("Graph loaded")
    best = None
    best_entropy = None
    for i in range(nb_attempts):
        s = time()
        state = gt.minimize_nested_blockmodel_dl(
            g)  #, deg_corr=deg_corr, B_min=min_clusters, B_max=max_clusters)
        entropy = state.entropy()
        print("Run #%s/%s in %ss" %
              (i + 1, nb_attempts, int((time() - s) * 100) / 100) +
              " - model entropy: " + str(entropy))
        if not best_entropy or entropy < best_entropy:
            best = state
            best_entropy = entropy
            print(" -> Best so far")

    max_clusters_str = ""
    if max_clusters:
        max_clusters_str = "-%s_max_clusters" % max_clusters

    statefile = graphmlfile.replace(
        ".graphml",
        max_clusters_str + "-entropy_%s.state" % round(best_entropy))
    with open(statefile, "wb") as f:
        pickle.dump(best, f)

    print("State saved in", statefile)
    return statefile
コード例 #2
0
def net_hierarchy_plot(gnetdata, filename=None, **kwarg):
    """
     create a hierarchy gene net plot
    ---------------------------------------------
    :param gnetdata: Gnetdata object
    :param filename: str, default None.
    :param kwarg: additional parameters passed to graph_tool.all.draw_hierarchy()
    :return: None

    """

    assert 'graph' in gnetdata.NetAttrs.keys(), 'graph is empty!'
    assert 'communities' in gnetdata.NetAttrs.keys(), 'node communities is empty!'

    graph = nx2gt(gnetdata.NetAttrs['graph'])
    node_group = gnetdata.NetAttrs['communities']

    # deg = graph.degree_property_map('total')
    ngroup = graph.new_vertex_property('int')

    labels = dict(zip(list(range(graph.num_vertices())), list(graph.vertex_properties['id'])))
    for g in labels.keys():
        ngroup[g] = node_group.loc[node_group.node == labels[g], 'group']

    state = gt.minimize_nested_blockmodel_dl(graph, deg_corr=True)
    gt.draw_hierarchy(state, vertex_fill_color=ngroup, vertex_anchor=0,
                      vertex_text=graph.vertex_properties['id'],
                      output=filename, **kwarg)
    return None
コード例 #3
0
ファイル: service.py プロジェクト: idekerlab/graph-services
 def process_graphs(self, params, gs):
     """
     :params params: Dict of parameter.
     :params gs: A list of graph-tool's Graph objects
     :returns: A Graph object and its layout
     """
     parameter = {p: params[p] for p in self.parameter}
     ts = []
     tposs = []
     for g in gs:
         if g.num_vertices() <= 1:
             logging.warn("zero or one node in input_graph")
             ts.append(g)
             tpos = gt.sfdp_layout(g)
             tposs.append(tpos)
             continue
         state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)
         pos, t, tpos = gt.draw_hierarchy(state,
                                          output="output.pdf",
                                          **parameter)
         self.add_edge_id(t)
         self.propagate_label(t, g)
         self.copy_clabels(t, state)
         t.gp.label = t.new_gp("string")
         t.gp.label = OUTPUT_LABEL
         ts.append(t)
         tposs.append(tpos)
     return ts, tposs
コード例 #4
0
    def fit(self, overlap=False, hierarchical=True, B_min=None, n_init=1):
        '''
        Fit the sbm to the word-document network.
        - overlap, bool (default: False). Overlapping or Non-overlapping groups.
            Overlapping not implemented yet
        - hierarchical, bool (default: True). Hierarchical SBM or Flat SBM.
            Flat SBM not implemented yet.
        - Bmin, int (default:None): pass an option to the graph-tool inference specifying the minimum number of blocks.
        - n_init, int (default:1): number of different initial conditions to run in order to avoid local minimum of MDL.
        '''
        g = self.g
        if g is None:
            print('No data to fit the SBM. Load some data first (make_graph)')
        else:
            if overlap and "count" in g.ep:
                raise ValueError(
                    "When using overlapping SBMs, the graph must be constructed with 'counts=False'"
                )
            clabel = g.vp['kind']

            state_args = {'clabel': clabel, 'pclabel': clabel}
            if "count" in g.ep:
                state_args["eweight"] = g.ep.count

            ## the inference
            mdl = np.inf  ##
            for i_n_init in range(n_init):
                state_tmp = gt.minimize_nested_blockmodel_dl(
                    g,
                    deg_corr=True,
                    overlap=overlap,
                    state_args=state_args,
                    B_min=B_min)
                mdl_tmp = state_tmp.entropy()
                if mdl_tmp < mdl:
                    mdl = 1.0 * mdl_tmp
                    state = state_tmp.copy()

            self.state = state
            ## minimum description length
            self.mdl = state.entropy()
            ## collect group membership for each level in the hierarchy
            L = len(state.levels)
            dict_groups_L = {}

            ## only trivial bipartite structure
            if L == 2:
                self.L = 1
                for l in range(L - 1):
                    dict_groups_l = self.get_groups(l=l)
                    dict_groups_L[l] = dict_groups_l
            ## omit trivial levels: l=L-1 (single group), l=L-2 (bipartite)
            else:
                self.L = L - 2
                for l in range(L - 2):
                    dict_groups_l = self.get_groups(l=l)
                    dict_groups_L[l] = dict_groups_l
            self.groups = dict_groups_L
コード例 #5
0
ファイル: crisp_partition.py プロジェクト: deklanw/cdlib
def sbm_dl_nested(g, B_min=None, B_max=None, deg_corr=True, **kwargs):
    """Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models. (nested)

    Fit a nested non-overlapping stochastic block model (SBM) by minimizing its description length using an agglomerative heuristic.
    Return the lowest level found. Currently cdlib do not support hierarchical clustering.
    If no parameter is given, the number of blocks will be discovered automatically. Bounds for the number of communities can
    be provided using B_min, B_max.

    :param B_min: minimum number of communities that can be found
    :param B_max: maximum number of communities that can be found
    :param deg_corr: if true, use the degree corrected version of the SBM
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = sbm_dl(G)


    :References:

    Tiago P. Peixoto, “Hierarchical block structures and high-resolution model selection in large networks”, Physical Review X 4.1 (2014): 011047
    .. note:: Use implementation from graph-tool library, please report to https://graph-tool.skewed.de for details
    """
    if gt is None:
        raise Exception(
            "===================================================== \n"
            "The graph-tool library seems not to be installed (or incorrectly installed). \n"
            "Please check installation procedure there https://git.skewed.de/count0/graph-tool/wikis/installation-instructions#native-installation \n"
            "on linux/mac, you can use package managers to do so(apt-get install python3-graph-tool, brew install graph-tool, etc.)"
        )
    gt_g = convert_graph_formats(g, nx.Graph)
    gt_g, label_map = __from_nx_to_graph_tool(gt_g)
    state = gt.minimize_nested_blockmodel_dl(gt_g,
                                             B_min,
                                             B_max,
                                             deg_corr=deg_corr)
    level0 = state.get_levels()[0]

    affiliations = level0.get_blocks().get_array()
    affiliations = {
        label_map[i]: affiliations[i]
        for i in range(len(affiliations))
    }
    coms = affiliations2nodesets(affiliations)
    coms = [list(v) for k, v in coms.items()]
    return NodeClustering(coms,
                          g,
                          "SBM_nested",
                          method_parameters={
                              "B_min": B_min,
                              "B_max": B_max,
                              "deg_corr": deg_corr
                          })
コード例 #6
0
    def fit(self, overlap=False, n_init=1, verbose=False, epsilon=1e-3):
        '''
        Fit the sbm to the word-document network.
        - overlap, bool (default: False). Overlapping or Non-overlapping groups.
            Overlapping not implemented yet
        - n_init, int (default:1): number of different initial conditions to run in order to avoid local minimum of MDL.
        '''
        g = self.g
        if g is None:
            print('No data to fit the SBM. Load some data first (make_graph)')
        else:
            if overlap and "count" in g.ep:
                raise ValueError(
                    "When using overlapping SBMs, the graph must be constructed with 'counts=False'"
                )
            clabel = g.vp['kind']

            state_args = {'clabel': clabel, 'pclabel': clabel}
            if "count" in g.ep:
                state_args["eweight"] = g.ep.count

            ## the inference
            mdl = np.inf  ##
            for i_n_init in range(n_init):
                base_type = gt.BlockState if not overlap else gt.OverlapBlockState
                state_tmp = gt.minimize_nested_blockmodel_dl(
                    g,
                    state_args=dict(base_type=base_type, **state_args),
                    multilevel_mcmc_args=dict(verbose=verbose))
                L = 0
                for s in state_tmp.levels:
                    L += 1
                    if s.get_nonempty_B() == 2:
                        break
                state_tmp = state_tmp.copy(bs=state_tmp.get_bs()[:L] +
                                           [np.zeros(1)])
                # state_tmp = state_tmp.copy(sampling=True)
                # delta = 1 + epsilon
                # while abs(delta) > epsilon:
                #     delta = state_tmp.multiflip_mcmc_sweep(niter=10, beta=np.inf)[0]
                #     print(delta)
                print(state_tmp)

                mdl_tmp = state_tmp.entropy()
                if mdl_tmp < mdl:
                    mdl = 1.0 * mdl_tmp
                    state = state_tmp.copy()

            self.state = state
            ## minimum description length
            self.mdl = state.entropy()
            L = len(state.levels)
            if L == 2:
                self.L = 1
            else:
                self.L = L - 2
コード例 #7
0
def update_state():
    global time
    mark.a = False
    visited.a = False
    g.set_vertex_filter(None)

    # visit the nodes in random order
    vs = list(g.vertices())
    shuffle(vs)
    for v in vs:
        if time - 5 < order[v] < time + 5:
            visited[v] = True

        elif time == order[v]:
            visited[v] = True
            mark[v] = True
        else:
            visited[v] = False
    # Filter out the recovered vertices
    g.set_vertex_filter(visited)
    size = gt.prop_to_size(g.degree_property_map("total"),ma=10)
    #vsize = gt.prop_to_size(deg)
    nested_state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)

    level = nested_state.get_levels()[0].get_blocks()
    print(list(level))
    gt.sfdp_layout(g, pos=pos, eweight=g.ep['weight'],
                   groups=level, max_iter=0)
    for v in vs:
        vsize[v] = size[v]
        state[v] = level[v]
    print(g.num_vertices())

    # The following will force the re-drawing of the graph, and issue a
    # re-drawing of the GTK window.
    win.graph.fit_to_window(g=g)
    win.graph.regenerate_surface()
    win.graph.queue_draw()

    # if doing an offscreen animation, dump frame to disk
    time += 1
    if time > max_time:
        sys.exit(0)
    print(time)
    if offscreen:
        pixbuf = win.get_pixbuf()
        strdate = (initial_date+timedelta(days=time)).strftime('%d-%m-%Y')
        pixbuf = put_text(pixbuf, strdate, 0, 10)
        pixbuf.savev(dir + '/news-date_%06d.png' % time, 'png', [], [])

    # We need to return True so that the main loop will call this function more
    # than once.
    print('-'*80)
    return True
コード例 #8
0
    def fit(self) -> None:
        """ given graph, fits NSBM and writes to state and block instance variables """

        print("Fitting NSBM")
        state = gt.minimize_nested_blockmodel_dl(self.g, deg_corr=True)
        self.state = state
        self.levels = self.state.get_levels()
        self.block_level = min(self.block_level, len(self.levels))
        self.__get_block_metadata()

        print("NSBM hierarchy summary:")
        state.print_summary()
コード例 #9
0
    def fit(self,
            overlap=False,
            hierarchical=True,
            B_min=None,
            n_init=1,
            verbose=False):
        """
        Fit the sbm to the word-document network.
        - overlap, bool (default: False). Overlapping or Non-overlapping groups.
            Overlapping not implemented yet
        - hierarchical, bool (default: True). Hierarchical SBM or Flat SBM.
            Flat SBM not implemented yet.
        - Bmin, int (default:None): pass an option to the graph-tool inference specifying the minimum number of blocks.
        - n_init, int (default:1): number of different initial conditions to run in order to avoid local minimum of MDL.
        """
        g = self.g
        if g is None:
            print("No data to fit the SBM. Load some data first (make_graph)")
        else:
            if overlap and "count" in g.ep:
                raise ValueError(
                    "When using overlapping SBMs, the graph must be constructed with 'counts=False'"
                )
            clabel = g.vp["kind"]

            state_args = {"clabel": clabel, "pclabel": clabel}
            if "count" in g.ep:
                state_args["eweight"] = g.ep.count

            ## the inference
            mdl = np.inf  ##
            for i_n_init in range(n_init):
                state_tmp = gt.minimize_nested_blockmodel_dl(
                    g,
                    deg_corr=True,
                    overlap=overlap,
                    state_args=state_args,
                    B_min=B_min,
                    verbose=verbose,
                )
                mdl_tmp = state_tmp.entropy()
                if mdl_tmp < mdl:
                    mdl = 1.0 * mdl_tmp
                    state = state_tmp.copy()

            self.state = state
            ## minimum description length
            self.mdl = state.entropy()
            L = len(state.levels)
            if L == 2:
                self.L = 1
            else:
                self.L = L - 2
コード例 #10
0
ファイル: SBM.py プロジェクト: mrcbarbier/EcoComDyn
def SBM_state(Aij):
    g = gt.Graph()
    lst = zip(*np.where(Aij != 0))
    for z in np.where(np.sum(np.abs(Aij) + np.abs(Aij.T), axis=0) == 0)[0]:
        lst += [(z, z)]
    g.add_edge_list(np.array(lst))
    tmp = g.new_edge_property('double')
    tmp.get_array()[:] = [Aij[i, j] for i, j in lst]
    g.edge_properties["weight"] = tmp
    state = gt.minimize_nested_blockmodel_dl(g,
                                             state_args=dict(
                                                 recs=[g.ep.weight],
                                                 rec_types=["real-normal"]))
    return [np.array(s) for s in state.get_bs()]
コード例 #11
0
    def draw(self, file_name, output_size=(1980, 1980)):
        # for straight edges, use only one line below instead of the next 5 lines before gt.graph_draw
        # tpos = pos = gt.radial_tree_layout(self.g, self.root)

        state = gt.minimize_nested_blockmodel_dl(self.g, deg_corr=True)
        t = gt.get_hierarchy_tree(state)[0]
        tpos = pos = gt.radial_tree_layout(self.g, self.root)
        cts = gt.get_hierarchy_control_points(self.g, t, tpos, beta=0.2)
        pos = self.g.own_property(tpos)

        gt.graph_draw(self.g, bg_color=[1,1,1,1], vertex_text=self.vprop_label, vertex_text_position=self.text_position, \
                      vertex_text_rotation=self.text_rotation, vertex_fill_color=self.fill_color, \
                      output=file_name, output_size=output_size, inline=True, vertex_font_size=self.font_size, \
                      edge_marker_size=self.marker_size, vertex_text_offset=self.text_offset, \
                      vertex_size=self.vertex_size, vertex_anchor = 0, pos=pos, edge_control_points=cts, fit_view=0.9)
コード例 #12
0
    def fit(self, overlap=False, hierarchical=True):
        '''
        Fit the sbm to the word-document network.
        - overlap, bool (default: False). Overlapping or Non-overlapping groups.
            Overlapping not implemented yet
        - hierarchical, bool (default: True). Hierarchical SBM or Flat SBM.
            Flat SBM not implemented yet.
        '''
        g = self.g
        if g is None:
            print('No data to fit the SBM. Load some data first (make_graph)')
        else:
            if overlap and "count" in g.ep:
                raise ValueError(
                    "When using overlapping SBMs, the graph must be constructed with 'counts=False'"
                )
            clabel = g.vp['kind']

            state_args = {'clabel': clabel, 'pclabel': clabel}
            if "count" in g.ep:
                state_args["eweight"] = g.ep.count

            ## the inference
            state = gt.minimize_nested_blockmodel_dl(g,
                                                     deg_corr=True,
                                                     overlap=overlap,
                                                     state_args=state_args)

            self.state = state
            ## minimum description length
            self.mdl = state.entropy()
            ## collect group membership for each level in the hierarchy
            L = len(state.levels)
            dict_groups_L = {}

            ## only trivial bipartite structure
            if L == 2:
                self.L = 1
                for l in range(L - 1):
                    dict_groups_l = self.get_groups(l=l)
                    dict_groups_L[l] = dict_groups_l
            ## omit trivial levels: l=L-1 (single group), l=L-2 (bipartite)
            else:
                self.L = L - 2
                for l in range(L - 2):
                    dict_groups_l = self.get_groups(l=l)
                    dict_groups_L[l] = dict_groups_l
            self.groups = dict_groups_L
コード例 #13
0
 def make_radial_graph(self, **args):
     if self.random_state is not None:
         np.random.seed(self.random_state)
     state = gt.minimize_nested_blockmodel_dl(self.g, deg_corr=True)
     t = gt.get_hierarchy_tree(state)[0]
     tpos = gt.radial_tree_layout(t,
                                  t.vertex(t.num_vertices() - 1),
                                  weighted=True)
     cts = self.g.new_edge_property("double")
     pos = self.g.new_edge_property("double")
     self.g.edge_properties['cts'] = gt.get_hierarchy_control_points(
         self.g, t, tpos)
     self.g.vertex_properties['pos'] = self.g.own_property(tpos)
     self.membership = list(state.get_bs()[0])
     if self.node_color.sum() == 0:
         self.node_color = self.convert_str_to_color(self.membership)
         self.add_color_nodes()
コード例 #14
0
def hierarchy_partition(graph, args):
    graph_json = {}
    graph_json['nodes'] = []
    graph_json['links'] = []

    # form the json file
    for e in graph.edges():
        graph_json['links'].append({
            'sourceIdx': int(e.source()),
            'targetIdx': int(e.target())
        })

    # find the latent hierarchical tree structure
    state = gt.minimize_nested_blockmodel_dl(graph, verbose=args.verbose)

    get_hierarchy_gt(graph_json, state)

    return graph_json
コード例 #15
0
    def fit(self):
        """
        Fits the hSBM to the undirected, layered multigraph, where the graph in the doc-word layer is bipartite.
        This uses the independent layer multilayer network where we have a degree-corrected SBM.
        """
        # We need to impose constraints on vertices and edges to keep track which layer are they in.
        state_args = {}
        # Vertices with different label values will not be clustered in the same group
        state_args["pclabel"] = self.g.vp["kind"]
        # Split the network in discrete layers based on edgetype. 0 is for word-doc graph and 1 is for hyperlink graph.
        state_args["ec"] = self.g.ep["edgeType"]
        # Independent layers version of the model (instead of 'edge covariates')
        state_args["layers"] = True
        # Edge multiplicities based on occurrences.
        state_args["eweight"] = self.g.ep.edgeCount

        self.g.save("foo.gt.gz")
        # Specify parameters for community detection inference
        gt.seed_rng(self.random_seed)
        mdl = np.inf
        # Fit n_init random initializations to avoid local optimum of MDL.
        for _ in range(self.n_init):
            # Enables the use of LayeredBlockState. Use a degree-corrected layered SBM.
            state_temp = gt.minimize_nested_blockmodel_dl(self.g, state_args=dict(base_type=gt.LayeredBlockState,
                                                                                  **state_args))
            mdl_temp = state_temp.entropy()
            if mdl_temp < mdl:
                # We have found a new optimum
                mdl = mdl_temp
                state = state_temp.copy()

        self.state = state
        self.mdl = state.entropy()

        n_levels  = len(self.state.levels)
        # Figure out group levels
        if n_levels == 2:
            # Bipartite network
            self.groups = { 0: self.get_groupStats(l=0) }
            self.n_levels = len(self.groups)
        # Omit trivial levels: l=L-1 (single group), l=L-2 (bipartite)
        else:
            self.groups = { level: self.get_groupStats(l=level) for level in range(n_levels - 2) }
            self.n_levels = len(self.groups)
コード例 #16
0
ファイル: sbm.py プロジェクト: NeuroDataDesign/lemur
    def fitSBM(self,
               deg_corr_1=False,
               verbose_1=False,
               wait_1=10,
               nbreaks_1=2,
               n=10,
               verbose_2=False):
        """
        Fit a nested SBM to graph
        Pass arguments to graph-tools minimize_nested_blockmodel_dl
        """

        state = gt.minimize_nested_blockmodel_dl(self.g,
                                                 deg_corr=deg_corr_1,
                                                 verbose=verbose_1,
                                                 mcmc_equilibrate_args=dict(
                                                     wait=wait_1,
                                                     nbreaks=nbreaks_1,
                                                     mcmc_args=dict(niter=n),
                                                     verbose=verbose_2))
        return state
コード例 #17
0
ファイル: graphtool.py プロジェクト: sletkeman/practicum
def build_nest_block_model(useOnDemand, viewer_condition, content_condition,
                           size, use_deg_corr, use_edge_weights, savedDir):
    g, engagement_df, viewers, content = build_tree(useOnDemand,
                                                    viewer_condition,
                                                    content_condition, size,
                                                    savedDir)
    print("building model")
    state_args = dict(recs=[g.ep.engagement], rec_types=[
        "real-normal"
    ]) if use_edge_weights else dict()
    state = gt.minimize_nested_blockmodel_dl(g,
                                             state_args=state_args,
                                             deg_corr=use_deg_corr)

    # expand and improve the model
    # S1 = state.entropy()
    # state = state.copy(bs=state.get_bs() + [np.zeros(1)] * 4, sampling=True)
    # for i in range(100):
    #     ret = state.multiflip_mcmc_sweep(niter=10, beta=np.inf)
    # S2 = state.entropy()
    # print("Improvement:", S2 - S1)

    print("preparing results")
    levels = state.get_levels()
    blocks = []
    for level in levels:
        blocks.append(level.get_blocks())
    verticies = g.get_vertices()
    results = {
        "entropy": state.entropy(),
        "results": [],
        "edges": engagement_df.to_dict('records'),
        "viewers": viewers,
        "content": content
    }
    counter = {"count": 0}
    for i, v in enumerate(verticies):
        recurseUp(len(blocks), blocks, 0, i, v, results.get("results"),
                  counter, v)
    return results
コード例 #18
0
    def fit(self,
            overlap=False,
            hierarchical=True,
            B_min=None,
            B_max=None,
            n_init=1,
            n_init_jobs=1,
            parallel=False,
            verbose=False,
            **kwds):
        '''
        Fit the sbm to the word-document network.
        - overlap, bool (default: False). Overlapping or Non-overlapping groups.
            Overlapping not implemented yet
        - hierarchical, bool (default: True). Hierarchical SBM or Flat SBM.
            Flat SBM not implemented yet.
        - Bmin, int (default:None): pass an option to the graph-tool inference specifying the minimum number of blocks.
        - n_init, int (default:1): number of different initial conditions to run in order to avoid local minimum of MDL.
        '''

        sequential = not parallel

        g = self.g
        if g is None:
            print('No data to fit the SBM. Load some data first (make_graph)')
        else:
            if overlap and "count" in g.ep:
                raise ValueError(
                    "When using overlapping SBMs, the graph must be constructed with 'counts=False'"
                )
            clabel = g.vp['kind']

            state_args = {'clabel': clabel, 'pclabel': clabel}
            if "count" in g.ep:
                state_args["eweight"] = g.ep.count
            ## the inference
            mdl = np.inf  ##

            if n_init_jobs == 1:
                for i_n_init in range(n_init):
                    state_tmp = gt.minimize_nested_blockmodel_dl(
                        g,
                        deg_corr=True,
                        overlap=overlap,
                        state_args=state_args,
                        mcmc_args={'sequential': sequential},
                        mcmc_equilibrate_args={
                            'mcmc_args': {
                                'sequential': sequential
                            }
                        },
                        mcmc_multilevel_args={
                            'mcmc_equilibrate_args': {
                                'mcmc_args': {
                                    'sequential': sequential
                                }
                            },
                            'anneal_args': {
                                'mcmc_equilibrate_args': {
                                    'mcmc_args': {
                                        'sequential': sequential
                                    }
                                }
                            }
                        },
                        B_min=B_min,
                        B_max=B_max,
                        verbose=verbose,
                        **kwds)
                    mdl_tmp = state_tmp.entropy()
                    if mdl_tmp < mdl:
                        mdl = 1.0 * mdl_tmp
                        state = state_tmp.copy()

            else:

                runs = Parallel(n_jobs=n_init_jobs)(delayed(
                    gt.minimize_nested_blockmodel_dl)(g,
                                                      deg_corr=True,
                                                      overlap=overlap,
                                                      B_min=B_min,
                                                      state_args=state_args,
                                                      verbose=verbose,
                                                      **kwds)
                                                    for _ in range(n_init))
                for i_n_init in range(n_init):
                    state_tmp = runs[i_n_init]

                mdl_tmp = state_tmp.entropy()
                if mdl_tmp < mdl:
                    mdl = 1.0 * mdl_tmp
                    state = state_tmp.copy()

            self.mdl = mdl
            self.state = state
            ## minimum description length
            self.mdl = state.entropy()
            L = len(state.levels)
            if L == 2:
                self.L = 1
            else:
                self.L = L - 2
コード例 #19
0
g.edge_properties['edge_color'] = edge_color
for e in g.edges():
    if plot_color[e.source()] != plot_color[e.target()]:
        if plot_color[e.source()] == (0, 0, 1, 1):
            #orange on dem -> rep
            edge_color[e] = (255.0 / 255.0, 102 / 255.0, 0 / 255.0, alpha)
        else:
            edge_color[e] = (102.0 / 255.0, 51 / 255.0, 153 / 255.0, alpha)
    #red on rep-rep edges
    elif plot_color[e.source()] == (1, 0, 0, 1):
        edge_color[e] = (1, 0, 0, alpha)
    #blue on dem-dem edges
    else:
        edge_color[e] = (0, 0, 1, alpha)

state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)
bstack = state.get_bstack()
t = gt.get_hierarchy_tree(bstack)[0]
tpos = pos = gt.radial_tree_layout(t,
                                   t.vertex(t.num_vertices() - 1),
                                   weighted=True)
cts = gt.get_hierarchy_control_points(g, t, tpos)
pos = g.own_property(tpos)
b = bstack[0].vp["b"]

#labels
text_rot = g.new_vertex_property('double')
g.vertex_properties['text_rot'] = text_rot
for v in g.vertices():
    if pos[v][0] > 0:
        text_rot[v] = math.atan(pos[v][1] / pos[v][0])
コード例 #20
0
def amino_acid_circos(cmap='tab20', filetype="pdf", reverse=False):
    cm = plt.cm.get_cmap(cmap)
    cmappable = ScalarMappable(norm=Normalize(vmin=0, vmax=20), cmap=cm)

    g_aa = gt.Graph(directed=False)
    g_aa.vp.aa = g_aa.new_vertex_property("string")
    g_aa.vp.aa_color = g_aa.new_vertex_property("vector<float>")
    g_aa.vp.count = g_aa.new_vertex_property("float")
    g_aa.ep.count = g_aa.new_edge_property("float")
    g_aa.ep.grad = g_aa.new_edge_property("vector<float>")

    for aa_index, aa in enumerate(aa_order):
        if aa == "X": continue
        v = g_aa.add_vertex()
        g_aa.vp.aa[v] = aa
        g_aa.vp.aa_color[v] = cmappable.to_rgba(aa_index)
        g_aa.vp.count[v] = np.sqrt(len([k for k, v in codontable.items() if v == aa])) * 28

    adj = np.zeros((g_aa.num_vertices(), g_aa.num_vertices()))
    for ref_index, ref in enumerate(g_aa.vertices()):
        for alt_index, alt in enumerate(g_aa.vertices()):
            if alt <= ref: continue
            aa_ref, aa_alt = g_aa.vp.aa[ref], g_aa.vp.aa[alt]
            c_ref = [k for k, v in codontable.items() if v == aa_ref]
            c_alt = [k for k, v in codontable.items() if v == aa_alt]
            nei = [(r, a) for r, a in product(c_ref, c_alt) if distance_str(r, a) == 1]
            if len(nei) > 0:
                e_aa = g_aa.add_edge(ref, alt)
                x = cmappable.to_rgba(ref_index)[:3]
                y = cmappable.to_rgba(alt_index)[:3]
                if reverse: x, y = y, x
                g_aa.ep.grad[e_aa] = [0.0, *x, 0.75, 1.0, *y, 0.75]
                g_aa.ep.count[e_aa] = len(nei) * 2.0
                adj[ref_index, alt_index] = len(nei)

    table = open("aa-adjacency.tex", "w")
    table.writelines("\\begin{table}[H]\n\\centering\n")
    table.writelines("\\begin{tabular}{|c||" + "c|" * g_aa.num_vertices() + "}\n")
    table.writelines("\\hline & ")
    table.writelines(" & ".join(map(lambda x: "\\textbf{" + x + "}", g_aa.vp.aa)) + "\\\\\n")
    table.writelines("\\hline\n\\hline ")
    for i in range(adj.shape[0]):
        elts = ["\\textbf{" + g_aa.vp.aa[i] + "}"]
        for j in range(adj.shape[1]):
            if i < j:
                elts.append("{:d}".format(int(adj[i][j])))
            else:
                elts.append("-")
        table.writelines(" & ".join(elts) + "\\\\\n\\hline ")
    table.writelines("\\end{tabular}\n")
    table.writelines("\\caption[]{}\n")
    table.writelines("\\end{table}\n")
    table.close()

    assert g_aa.num_vertices() == 20
    dist = gt.shortest_distance(g_aa)
    r = max([max(dist[g_aa.vertex(i)].a) for i in g_aa.vertices()])
    print('Amino acids graph radius : {0}'.format(r))
    dict_distance = {1: [], 2: [], 3: []}
    for source in g_aa.vertices():
        for target in g_aa.vertices():
            if source <= target: continue
            dict_distance[int(gt.shortest_distance(g_aa, source, target))].append(
                "{0}-{1}".format(g_aa.vp.aa[source], g_aa.vp.aa[target]))

    for k, v in dict_distance.items():
        print("d={0}: {1} pairs".format(k, len(v)))
        print(", ".join(v))

    print('Amino acids : {0} transitions out of {1} possibles '.format(g_aa.num_edges(), 20 * 19 / 2))
    state = gt.minimize_nested_blockmodel_dl(g_aa, deg_corr=True)
    t = gt.get_hierarchy_tree(state)[0]
    tpos = gt.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
    cts = gt.get_hierarchy_control_points(g_aa, t, tpos)
    pos = g_aa.own_property(tpos)
    gt.graph_draw(g_aa, pos=pos, edge_control_points=cts, vertex_anchor=0, vertex_text=g_aa.vp.aa,
                  vertex_fill_color=g_aa.vp.aa_color, vertex_size=g_aa.vp.count, vertex_font_size=16,
                  vertex_pen_width=3.2, vertex_color=(0.65, 0.65, 0.65, 1),
                  edge_gradient=g_aa.ep.grad, edge_pen_width=g_aa.ep.count,
                  output="gt-aa-{0}.{1}".format(cmap, filetype))
コード例 #21
0
ファイル: _flat_model.py プロジェクト: stuarteberg/schist
def flat_model(
    adata: AnnData,
    max_iterations: int = 1000000,
    epsilon: float = 0,
    equilibrate: bool = False,
    wait: int = 1000,
    nbreaks: int = 2,
    collect_marginals: bool = False,
    niter_collect: int = 10000,
    deg_corr: bool = True,
    multiflip: bool = True,
    fast_model: bool = False,
    n_init: int = 1,
    beta_range: Tuple[float] = (1., 100.),
    steps_anneal: int = 5,
    resume: bool = False,
    *,
    restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
    random_seed: Optional[int] = None,
    key_added: str = 'sbm',
    adjacency: Optional[sparse.spmatrix] = None,
    neighbors_key: Optional[str] = 'neighbors',
    directed: bool = False,
    use_weights: bool = False,
    copy: bool = False,
    minimize_args: Optional[Dict] = {},
    equilibrate_args: Optional[Dict] = {},    
) -> Optional[AnnData]:
    """\
    Cluster cells into subgroups [Peixoto14]_.

    Cluster cells using the  Stochastic Block Model [Peixoto14]_, performing
    Bayesian inference on node groups. 

    This requires having ran :func:`~scanpy.pp.neighbors` or
    :func:`~scanpy.external.pp.bbknn` first.

    Parameters
    ----------
    adata
        The annotated data matrix.
    max_iterations
        Maximal number of iterations to be performed by the equilibrate step.
    epsilon
        Relative changes in entropy smaller than epsilon will
        not be considered as record-breaking.
    equilibrate
        Whether or not perform the mcmc_equilibrate step.
        Equilibration should always be performed. Note, also, that without
        equilibration it won't be possible to collect marginals.
    collect_marginals
        Whether or not collect node probability of belonging
        to a specific partition.
    niter_collect
        Number of iterations to force when collecting marginals. This will
        increase the precision when calculating probabilites
    wait
        Number of iterations to wait for a record-breaking event.
        Higher values result in longer computations. Set it to small values
        when performing quick tests.
    nbreaks
        Number of iteration intervals (of size `wait`) without
        record-breaking events necessary to stop the algorithm.
    deg_corr
        Whether to use degree correction in the minimization step. In many
        real world networks this is the case, although this doesn't seem
        the case for KNN graphs used in scanpy.
    multiflip
        Whether to perform MCMC sweep with multiple simultaneous moves to sample
        network partitions. It may result in slightly longer runtimes, but under
        the hood it allows for a more efficient space exploration.
    fast_model
        Whether to skip initial minization step and let the MCMC find a solution. 
        This approach tend to be faster and consume less memory, but 
        less accurate.
    n_init
        Number of initial minimizations to be performed. The one with smaller
        entropy is chosen
    beta_range
        Inverse temperature at the beginning and the end of the equilibration
    steps_anneal
        Number of steps in which the simulated annealing is performed
    resume
        Start from a previously created model, if any, without initializing a novel
        model    
    key_added
        `adata.obs` key under which to add the cluster labels.
    adjacency
        Sparse adjacency matrix of the graph, defaults to
        `adata.uns['neighbors']['connectivities']` in case of scanpy<=1.4.6 or
        `adata.obsp[neighbors_key][connectivity_key]` for scanpy>1.4.6
    neighbors_key
        The key passed to `sc.pp.neighbors`
    directed
        Whether to treat the graph as directed or undirected.
    use_weights
        If `True`, edge weights from the graph are used in the computation
        (placing more emphasis on stronger edges). Note that this
        increases computation times
    copy
        Whether to copy `adata` or modify it inplace.
    random_seed
        Random number to be used as seed for graph-tool

    Returns
    -------
    `adata.obs[key_added]`
        Array of dim (number of samples) that stores the subgroup id
        (`'0'`, `'1'`, ...) for each cell.
    `adata.uns['sbm']['params']`
        A dict with the values for the parameters `resolution`, `random_state`,
        and `n_iterations`.
    `adata.uns['sbm']['stats']`
        A dict with the values returned by mcmc_sweep
    `adata.uns['sbm']['cell_affinity']`
        A `np.ndarray` with cell probability of belonging to a specific group
    `adata.uns['sbm']['state']`
        The BlockModel state object
    """

    raise DeprecationWarning("""This function has been deprecated since version 
    0.5.0, please consider usage of planted_model instead.
    """)

    if fast_model or resume: 
        # if the fast_model is chosen perform equilibration anyway
        equilibrate=True
        
    if resume and ('sbm' not in adata.uns or 'state' not in adata.uns['sbm']):
        # let the model proceed as default
        logg.warning('Resuming has been specified but a state was not found\n'
                     'Will continue with default minimization step')

        resume=False
        fast_model=False

    if random_seed:
        np.random.seed(random_seed)
        gt.seed_rng(random_seed)

    if collect_marginals:
        logg.warning('Collecting marginals has a large impact on running time')
        if not equilibrate:
            raise ValueError(
                "You can't collect marginals without MCMC equilibrate "
                "step. Either set `equlibrate` to `True` or "
                "`collect_marginals` to `False`"
            )

    start = logg.info('minimizing the Stochastic Block Model')
    adata = adata.copy() if copy else adata
    # are we clustering a user-provided graph or the default AnnData one?
    if adjacency is None:
        if neighbors_key not in adata.uns:
            raise ValueError(
                'You need to run `pp.neighbors` first '
                'to compute a neighborhood graph.'
            )
        elif 'connectivities_key' in adata.uns[neighbors_key]:
            # scanpy>1.4.6 has matrix in another slot
            conn_key = adata.uns[neighbors_key]['connectivities_key']
            adjacency = adata.obsp[conn_key]
        else:
            # scanpy<=1.4.6 has sparse matrix here
            adjacency = adata.uns[neighbors_key]['connectivities']
    if restrict_to is not None:
        restrict_key, restrict_categories = restrict_to
        adjacency, restrict_indices = restrict_adjacency(
            adata,
            restrict_key,
            restrict_categories,
            adjacency,
        )
    # convert it to igraph
    g = get_graph_tool_from_adjacency(adjacency, directed=directed)

    recs=[]
    rec_types=[]
    if use_weights:
        # this is not ideal to me, possibly we may need to transform
        # weights. More tests needed.
        recs=[g.ep.weight]
        rec_types=['real-normal']

    if fast_model:
        # do not minimize, start with a dummy state and perform only equilibrate
        state = gt.BlockState(g=g, B=1, sampling=True,
                              state_args=dict(deg_corr=deg_corr,
                              recs=recs,
                              rec_types=rec_types
                              ))
    elif resume:
        # create the state and make sure sampling is performed
        state = adata.uns['sbm']['state'].copy(sampling=True)
        g = state.g
    else:
        if n_init < 1:
            n_init = 1
        
        states = [gt.minimize_nested_blockmodel_dl(g, deg_corr=deg_corr, 
                  state_args=dict(recs=recs,  rec_types=rec_types), 
                  **minimize_args) for n in range(n_init)]
                  
        state = states[np.argmin([s.entropy() for s in states])]    

        logg.info('    done', time=start)
        state = state.copy(B=g.num_vertices())
    
    # equilibrate the Markov chain
    if equilibrate:
        logg.info('running MCMC equilibration step')
        equilibrate_args['wait'] = wait
        equilibrate_args['nbreaks'] = nbreaks
        equilibrate_args['max_niter'] = max_iterations
        equilibrate_args['multiflip'] = multiflip
        equilibrate_args['mcmc_args'] = {'niter':10}
        
        dS, nattempts, nmoves = gt.mcmc_anneal(state, 
                                               mcmc_equilibrate_args=equilibrate_args,
                                               niter=steps_anneal,
                                               beta_range=beta_range)

    if collect_marginals and equilibrate:
        # we here only retain level_0 counts, until I can't figure out
        # how to propagate correctly counts to higher levels
        # I wonder if this should be placed after group definition or not
        logg.info('    collecting marginals')
        group_marginals = np.zeros(g.num_vertices() + 1)
        def _collect_marginals(s):
            group_marginals[s.get_nonempty_B()] += 1

        gt.mcmc_equilibrate(state, wait=wait, nbreaks=nbreaks, epsilon=epsilon,
                            max_niter=max_iterations, multiflip=False,
                            force_niter=niter_collect, mcmc_args=dict(niter=10),
                            callback=_collect_marginals)
        logg.info('    done', time=start)

    # everything is in place, we need to fill all slots
    # first build an array with
    groups = pd.Series(state.get_blocks().get_array()).astype('category')
    new_cat_names = dict([(cx, u'%s' % cn) for cn, cx in enumerate(groups.cat.categories)])
    groups.cat.rename_categories(new_cat_names, inplace=True)

    if restrict_to is not None:
        groups.index = adata.obs[restrict_key].index
    else:
        groups.index = adata.obs_names

    # add column names
    adata.obs.loc[:, key_added] = groups

    # add some unstructured info

    adata.uns['sbm'] = {}
    adata.uns['sbm']['stats'] = dict(
    dS=dS,
    nattempts=nattempts,
    nmoves=nmoves,
    modularity=gt.modularity(g, state.get_blocks())
    )
    adata.uns['sbm']['state'] = state

    # now add marginal probabilities.

    if collect_marginals:
        # cell marginals will be a list of arrays with probabilities
        # of belonging to a specific group
        adata.uns['sbm']['group_marginals'] = group_marginals

    # calculate log-likelihood of cell moves over the remaining levels
    
    adata.uns['sbm']['cell_affinity'] = {'1':get_cell_loglikelihood(state, as_prob=True)}
    
    # last step is recording some parameters used in this analysis
    adata.uns['sbm']['params'] = dict(
        epsilon=epsilon,
        wait=wait,
        nbreaks=nbreaks,
        equilibrate=equilibrate,
        fast_model=fast_model,
        collect_marginals=collect_marginals,
        random_seed=random_seed
    )


    logg.info(
        '    finished',
        time=start,
        deep=(
            f'found {state.get_nonempty_B()} clusters and added\n'
            f'    {key_added!r}, the cluster labels (adata.obs, categorical)'
        ),
    )
    return adata if copy else None
コード例 #22
0
def nested_model(
    adata: AnnData,
    max_iterations: int = 1000000,
    epsilon: float = 0,
    equilibrate: bool = False,
    wait: int = 1000,
    nbreaks: int = 2,
    collect_marginals: bool = False,
    niter_collect: int = 10000,
    hierarchy_length: int = 10,
    deg_corr: bool = True,
    multiflip: bool = True,
    fast_model: bool = False,
    fast_tol: float = 1e-6,
    n_sweep: int = 10,
    beta: float = np.inf,
    n_init: int = 1,
    beta_range: Tuple[float] = (1., 1000.),
    steps_anneal: int = 3,
    resume: bool = False,
    *,
    restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
    random_seed: Optional[int] = None,
    key_added: str = 'nsbm',
    adjacency: Optional[sparse.spmatrix] = None,
    neighbors_key: Optional[str] = 'neighbors',
    directed: bool = False,
    use_weights: bool = False,
    prune: bool = False,
    return_low: bool = False,
    copy: bool = False,
    minimize_args: Optional[Dict] = {},
    equilibrate_args: Optional[Dict] = {},
) -> Optional[AnnData]:
    """\
    Cluster cells into subgroups [Peixoto14]_.

    Cluster cells using the nested Stochastic Block Model [Peixoto14]_,
    a hierarchical version of Stochastic Block Model [Holland83]_, performing
    Bayesian inference on node groups. NSBM should circumvent classical
    limitations of SBM in detecting small groups in large graphs
    replacing the noninformative priors used by a hierarchy of priors
    and hyperpriors.

    This requires having ran :func:`~scanpy.pp.neighbors` or
    :func:`~scanpy.external.pp.bbknn` first.

    Parameters
    ----------
    adata
        The annotated data matrix.
    max_iterations
        Maximal number of iterations to be performed by the equilibrate step.
    epsilon
        Relative changes in entropy smaller than epsilon will
        not be considered as record-breaking.
    equilibrate
        Whether or not perform the mcmc_equilibrate step.
        Equilibration should always be performed. Note, also, that without
        equilibration it won't be possible to collect marginals.
    collect_marginals
        Whether or not collect node probability of belonging
        to a specific partition.
    niter_collect
        Number of iterations to force when collecting marginals. This will
        increase the precision when calculating probabilites
    wait
        Number of iterations to wait for a record-breaking event.
        Higher values result in longer computations. Set it to small values
        when performing quick tests.
    nbreaks
        Number of iteration intervals (of size `wait`) without
        record-breaking events necessary to stop the algorithm.
    hierarchy_length
        Initial length of the hierarchy. When large values are
        passed, the top-most levels will be uninformative as they
        will likely contain the very same groups. Increase this valus
        if a very large number of cells is analyzed (>100.000).
    deg_corr
        Whether to use degree correction in the minimization step. In many
        real world networks this is the case, although this doesn't seem
        the case for KNN graphs used in scanpy.
    multiflip
        Whether to perform MCMC sweep with multiple simultaneous moves to sample
        network partitions. It may result in slightly longer runtimes, but under
        the hood it allows for a more efficient space exploration.
    fast_model
        Whether to skip initial minization step and let the MCMC find a solution. 
        This approach tend to be faster and consume less memory, but may be
        less accurate.
    fast_tol
        Tolerance for fast model convergence.
    n_sweep 
        Number of iterations to be performed in the fast model MCMC greedy approach
    beta
        Inverse temperature for MCMC greedy approach    
    n_init
        Number of initial minimizations to be performed. The one with smaller
        entropy is chosen
    beta_range
        Inverse temperature at the beginning and the end of the equilibration
    steps_anneal
        Number of steps in which the simulated annealing is performed
    resume
        Start from a previously created model, if any, without initializing a novel
        model    
    key_added
        `adata.obs` key under which to add the cluster labels.
    adjacency
        Sparse adjacency matrix of the graph, defaults to
        `adata.uns['neighbors']['connectivities']` in case of scanpy<=1.4.6 or
        `adata.obsp[neighbors_key][connectivity_key]` for scanpy>1.4.6
    neighbors_key
        The key passed to `sc.pp.neighbors`
    directed
        Whether to treat the graph as directed or undirected.
    use_weights
        If `True`, edge weights from the graph are used in the computation
        (placing more emphasis on stronger edges). Note that this
        increases computation times
    prune
        Some high levels in hierarchy may contain the same information in terms of 
        cell assignments, even if they apparently have different group names. When this
        option is set to `True`, the function only returns informative levels.
        Note, however, that cell affinities are still reported for all levels. Pruning
        does not rename group levels
    return_low
        Whether or not return nsbm_level_0 in adata.obs. This level usually contains
        so many groups that it cannot be plot anyway, but it may be useful for particular
        analysis. By default it is not returned
    copy
        Whether to copy `adata` or modify it inplace.
    random_seed
        Random number to be used as seed for graph-tool

    Returns
    -------
    `adata.obs[key_added]`
        Array of dim (number of samples) that stores the subgroup id
        (`'0'`, `'1'`, ...) for each cell. 
    `adata.uns['nsbm']['params']`
        A dict with the values for the parameters `resolution`, `random_state`,
        and `n_iterations`.
    `adata.uns['nsbm']['stats']`
        A dict with the values returned by mcmc_sweep
    `adata.uns['nsbm']['cell_affinity']`
        A `np.ndarray` with cell probability of belonging to a specific group
    `adata.uns['nsbm']['state']`
        The NestedBlockModel state object
    """

    if resume:
        # if the fast_model is chosen perform equilibration anyway
        # also if a model has previously created
        equilibrate = True

    if resume and ('nsbm' not in adata.uns
                   or 'state' not in adata.uns['nsbm']):
        # let the model proceed as default
        logg.warning('Resuming has been specified but a state was not found\n'
                     'Will continue with default minimization step')

        resume = False

    if random_seed:
        np.random.seed(random_seed)
        gt.seed_rng(random_seed)

    if collect_marginals:
        logg.warning('Collecting marginals has a large impact on running time')
        if not equilibrate:
            raise ValueError(
                "You can't collect marginals without MCMC equilibrate "
                "step. Either set `equlibrate` to `True` or "
                "`collect_marginals` to `False`")

    start = logg.info('minimizing the nested Stochastic Block Model')
    adata = adata.copy() if copy else adata
    # are we clustering a user-provided graph or the default AnnData one?
    if adjacency is None:
        if neighbors_key not in adata.uns:
            raise ValueError('You need to run `pp.neighbors` first '
                             'to compute a neighborhood graph.')
        elif 'connectivities_key' in adata.uns[neighbors_key]:
            # scanpy>1.4.6 has matrix in another slot
            conn_key = adata.uns[neighbors_key]['connectivities_key']
            adjacency = adata.obsp[conn_key]
        else:
            # scanpy<=1.4.6 has sparse matrix here
            adjacency = adata.uns[neighbors_key]['connectivities']
    if restrict_to is not None:
        restrict_key, restrict_categories = restrict_to
        adjacency, restrict_indices = restrict_adjacency(
            adata,
            restrict_key,
            restrict_categories,
            adjacency,
        )
    # convert it to igraph
    g = get_graph_tool_from_adjacency(adjacency, directed=directed)

    recs = []
    rec_types = []
    if use_weights:
        # this is not ideal to me, possibly we may need to transform
        # weights. More tests needed.
        recs = [g.ep.weight]
        rec_types = ['real-normal']

    if n_init < 1:
        n_init = 1

    if fast_model:
        # do not minimize, start with a dummy state and perform only equilibrate

        states = [
            gt.NestedBlockState(g=g,
                                state_args=dict(deg_corr=deg_corr,
                                                recs=recs,
                                                rec_types=rec_types))
            for n in range(n_init)
        ]
        for x in range(n_init):
            dS = 1
            while np.abs(dS) > fast_tol:
                # perform sweep until a tolerance is reached
                dS, _, _ = states[x].multiflip_mcmc_sweep(beta=beta,
                                                          niter=n_sweep)

        _amin = np.argmin([s.entropy() for s in states])
        state = states[_amin]

        #        dS = 1
        #        while np.abs(dS) > fast_tol:
        #            dS, nattempts, nmoves = state.multiflip_mcmc_sweep(niter=10, beta=np.inf)
        bs = state.get_bs()
        logg.info('    done', time=start)

    elif resume:
        # create the state and make sure sampling is performed
        state = adata.uns['nsbm']['state'].copy(sampling=True)
        bs = state.get_bs()
        # get the graph from state
        g = state.g
    else:

        states = [
            gt.minimize_nested_blockmodel_dl(
                g,
                deg_corr=deg_corr,
                state_args=dict(recs=recs, rec_types=rec_types),
                **minimize_args) for n in range(n_init)
        ]

        state = states[np.argmin([s.entropy() for s in states])]
        #        state = gt.minimize_nested_blockmodel_dl(g, deg_corr=deg_corr,
        #                                                 state_args=dict(recs=recs,
        #                                                 rec_types=rec_types),
        #                                                 **minimize_args)
        logg.info('    done', time=start)
        bs = state.get_bs()
        if len(bs) <= hierarchy_length:
            # increase hierarchy length up to the specified value
            # according to Tiago Peixoto 10 is reasonably large as number of
            # groups decays exponentially
            bs += [np.zeros(1)] * (hierarchy_length - len(bs))
        else:
            logg.warning(
                f'A hierarchy length of {hierarchy_length} has been specified\n'
                f'but the minimized model contains {len(bs)} levels')
            pass
        # create a new state with inferred blocks
        state = gt.NestedBlockState(g,
                                    bs,
                                    state_args=dict(recs=recs,
                                                    rec_types=rec_types),
                                    sampling=True)

    # equilibrate the Markov chain
    if equilibrate:
        logg.info('running MCMC equilibration step')
        # equlibration done by simulated annealing

        equilibrate_args['wait'] = wait
        equilibrate_args['nbreaks'] = nbreaks
        equilibrate_args['max_niter'] = max_iterations
        equilibrate_args['multiflip'] = multiflip
        equilibrate_args['mcmc_args'] = {'niter': 10}

        dS, nattempts, nmoves = gt.mcmc_anneal(
            state,
            mcmc_equilibrate_args=equilibrate_args,
            niter=steps_anneal,
            beta_range=beta_range)
    if collect_marginals and equilibrate:
        # we here only retain level_0 counts, until I can't figure out
        # how to propagate correctly counts to higher levels
        # I wonder if this should be placed after group definition or not
        logg.info('    collecting marginals')
        group_marginals = [
            np.zeros(g.num_vertices() + 1) for s in state.get_levels()
        ]

        def _collect_marginals(s):
            levels = s.get_levels()
            for l, sl in enumerate(levels):
                group_marginals[l][sl.get_nonempty_B()] += 1

        gt.mcmc_equilibrate(state,
                            wait=wait,
                            nbreaks=nbreaks,
                            epsilon=epsilon,
                            max_niter=max_iterations,
                            multiflip=True,
                            force_niter=niter_collect,
                            mcmc_args=dict(niter=10),
                            callback=_collect_marginals)
        logg.info('    done', time=start)

    # everything is in place, we need to fill all slots
    # first build an array with
    groups = np.zeros((g.num_vertices(), len(bs)), dtype=int)

    for x in range(len(bs)):
        # for each level, project labels to the vertex level
        # so that every cell has a name. Note that at this level
        # the labels are not necessarily consecutive
        groups[:, x] = state.project_partition(x, 0).get_array()

    groups = pd.DataFrame(groups).astype('category')

    # rename categories from 0 to n
    for c in groups.columns:
        new_cat_names = dict([
            (cx, u'%s' % cn)
            for cn, cx in enumerate(groups.loc[:, c].cat.categories)
        ])
        groups.loc[:, c].cat.rename_categories(new_cat_names, inplace=True)

    if restrict_to is not None:
        groups.index = adata.obs[restrict_key].index
    else:
        groups.index = adata.obs_names

    # add column names
    groups.columns = [
        "%s_level_%d" % (key_added, level) for level in range(len(bs))
    ]

    # remove any column with the same key
    keep_columns = [
        x for x in adata.obs.columns
        if not x.startswith('%s_level_' % key_added)
    ]
    adata.obs = adata.obs.loc[:, keep_columns]
    # concatenate obs with new data, skipping level_0 which is usually
    # crap. In the future it may be useful to reintegrate it
    # we need it in this function anyway, to match groups with node marginals
    if return_low:
        adata.obs = pd.concat([adata.obs, groups], axis=1)
    else:
        adata.obs = pd.concat([adata.obs, groups.iloc[:, 1:]], axis=1)

    # add some unstructured info

    adata.uns['nsbm'] = {}
    adata.uns['nsbm']['stats'] = dict(level_entropy=np.array(
        [state.level_entropy(x) for x in range(len(state.levels))]),
                                      modularity=np.array([
                                          gt.modularity(
                                              g, state.project_partition(x, 0))
                                          for x in range(len((state.levels)))
                                      ]))
    if equilibrate:
        adata.uns['nsbm']['stats']['dS'] = dS
        adata.uns['nsbm']['stats']['nattempts'] = nattempts
        adata.uns['nsbm']['stats']['nmoves'] = nmoves

    adata.uns['nsbm']['state'] = state

    # now add marginal probabilities.

    if collect_marginals:
        # refrain group marginals. We collected data in vector as long as
        # the number of cells, cut them into appropriate length data
        adata.uns['nsbm']['group_marginals'] = {}
        for nl, level_marginals in enumerate(group_marginals):
            idx = np.where(level_marginals > 0)[0] + 1
            adata.uns['nsbm']['group_marginals'][nl] = np.array(
                level_marginals[:np.max(idx)])

    # prune uninformative levels, if any
    if prune:
        to_remove = prune_groups(groups)
        logg.info(f'    Removing levels f{to_remove}')
        adata.obs.drop(to_remove, axis='columns', inplace=True)

    # calculate log-likelihood of cell moves over the remaining levels
    # we have to calculate events at level 0 and propagate to upper levels
    logg.info('    calculating cell affinity to groups')
    levels = [
        int(x.split('_')[-1]) for x in adata.obs.columns
        if x.startswith(f'{key_added}_level')
    ]
    adata.uns['nsbm']['cell_affinity'] = dict.fromkeys(
        [str(x) for x in levels])
    p0 = get_cell_loglikelihood(state, level=0, as_prob=True)

    adata.uns['nsbm']['cell_affinity'][0] = p0
    l0 = "%s_level_0" % key_added
    for nl, level in enumerate(groups.columns[1:]):
        cross_tab = pd.crosstab(groups.loc[:, l0], groups.loc[:, level])
        cl = np.zeros((p0.shape[0], cross_tab.shape[1]), dtype=p0.dtype)
        for x in range(cl.shape[1]):
            # sum counts of level_0 groups corresponding to
            # this group at current level
            cl[:, x] = p0[:, np.where(cross_tab.iloc[:, x] > 0)[0]].sum(axis=1)
        adata.uns['nsbm']['cell_affinity'][str(nl + 1)] = cl / np.sum(
            cl, axis=1)[:, None]

    # last step is recording some parameters used in this analysis
    adata.uns['nsbm']['params'] = dict(
        epsilon=epsilon,
        wait=wait,
        nbreaks=nbreaks,
        equilibrate=equilibrate,
        fast_model=fast_model,
        collect_marginals=collect_marginals,
        hierarchy_length=hierarchy_length,
        random_seed=random_seed,
        prune=prune,
    )

    logg.info(
        '    finished',
        time=start,
        deep=
        (f'found {state.get_levels()[1].get_nonempty_B()} clusters at level_1, and added\n'
         f'    {key_added!r}, the cluster labels (adata.obs, categorical)'),
    )
    return adata if copy else None
コード例 #23
0
def Stochastic():

    import pandas as pd
    import numpy as np
    import pprint as pp
    import locale
    import matplotlib.pyplot as plt
    import matplotlib.ticker as tkr
    import graph_tool.all as gt
    import math

    # Need to drag this out into the real world
    from GAC_Graph_Builder import findEdges

    t = gt.Graph(directed=True)

    tprop_label = t.new_vertex_property("string")
    tprop_instType = t.new_vertex_property("string")

    linkDict, instSet = findEdges()

    # ingest our university checking lists [this is sloppy, TBI]

    foreignUniTxt = open('Workaround txts/Foreign Unis.txt', 'r')
    UKUniTxt = open('Workaround txts/UK Unis.txt', 'r')

    forerignUniVals = foreignUniTxt.read().splitlines()
    UKUniVals = UKUniTxt.read().splitlines()

    # add vertices and label them based on their names.

    ######## FILTERING BASED ON CORDIS RESIDENCY ##########

    dfCordisNames = pd.read_pickle('Pickles/CORDIS_Countries.pickle')

    eligiblenames = dfCordisNames.name.values.tolist()

    veryDirtyWorkaround = ['FOCUS', 'FLUOR', 'GE', 'NI', 'OTE', 'ROKE']

    for inst in instSet:

        nameCheck = inst.upper()
        firstFound = next((x for x in eligiblenames if nameCheck in x), None)
        if inst in forerignUniVals:
            del (linkDict[inst])
        elif nameCheck in veryDirtyWorkaround:
            del (linkDict[inst])
        elif firstFound is None:
            del (linkDict[inst])
        else:
            vert = t.add_vertex()
            tprop_label[vert] = str(inst)

    del (linkDict[''])

    # internalise property map
    t.vertex_properties["label"] = tprop_label

    # explicitly declare the hierarchy defining vertices and edges, the sequencing here matters.
    for_uni = t.add_vertex()
    UK_uni = t.add_vertex()
    other = t.add_vertex()
    root = t.add_vertex()

    edgeList = [(root, for_uni), (root, UK_uni), (root, other)]
    t.add_edge_list(edgeList)

    # use label name to add edges to hierarchy
    for i in range(t.num_vertices())[:-4]:
        if tprop_label[i] in forerignUniVals:
            t.add_edge(for_uni, t.vertex(i))
            tprop_instType[i] = "Foreign Uni"
        elif tprop_label[i] in UKUniVals:
            t.add_edge(UK_uni, t.vertex(i))
            tprop_instType[i] = "UK Uni"
        else:
            t.add_edge(other, t.vertex(i))
            tprop_instType[i] = "Other Institution"

    t.vertex_properties["instType"] = tprop_instType
    tpos = gt.radial_tree_layout(t,
                                 t.vertex(t.num_vertices() - 1),
                                 rel_order_leaf=True)

    ######### MAIN GRAPH DRAWING ################

    g = gt.Graph(directed=False)
    # creates graph g, using the same nodes (with the same index!)

    for v in t.vertices():
        gv = g.add_vertex()

    # we remove: root, for_uni, uk_uni or 'other' vertices

    lower = g.num_vertices() - 5
    current = g.num_vertices() - 1

    while current > lower:
        g.remove_vertex(current)
        current -= 1

    # Pull vertex properties from t

    labelDict = t.vertex_properties["label"]
    instTypeDict = t.vertex_properties["instType"]

    # create properties for g vertices

    gprop_label = g.new_vertex_property("string")
    gprop_instType = g.new_vertex_property("string")

    # match labels between g and t

    for v in g.vertices():
        gprop_label[v] = labelDict[v]
        gprop_instType[v] = instTypeDict[v]

    # make property map internal to graph g
    g.vertex_properties["label"] = gprop_label
    g.vertex_properties["instType"] = gprop_instType

    ###### COLOUR VERTICES #########

    # Reclaim variable names because lazy

    gprop_vcolour = g.new_vertex_property("string")

    for v in g.vertices():

        if gprop_instType[v] == "Foreign Uni":
            gprop_vcolour[v] = "red"
        elif gprop_instType[v] == "UK Uni":
            gprop_vcolour[v] = "blue"
        else:
            gprop_vcolour[v] = "white"

    g.vertex_properties["vcolour"] = gprop_vcolour

    # create numLinks edge property for g edges

    eprop_numLinks = g.new_edge_property("int")

    # creates the edges between nodes

    for i in linkDict:
        for n in linkDict[i]:
            #print(i)
            vertex_i = gt.find_vertex(g, gprop_label, i)[0]
            #print(n)
            try:
                vertex_n = gt.find_vertex(g, gprop_label, n)[0]
                e = g.add_edge(vertex_i, vertex_n)
                eprop_numLinks[e] = linkDict[i][n]
            except:
                IndexError

    ##### EXPERIMENTAL SIZE THINGS ######

    #gvprop_size = g.new_vertex_property('float')

    deleteList = []

    for v in g.vertices():

        # sum the num edges and the number of links they correspond to
        # use this to find a ratio and scale size off of this.

        numEdges = sum(1 for _ in v.all_edges())
        numLinks = 0

        for e in v.all_edges():

            numLinks += eprop_numLinks[e]

        #print(gprop_label[v])
        print("NumEdges = " + str(numEdges) + " NumLinks = " + str(numLinks))
        # create a delete list

        try:
            ratio = (numLinks / numEdges) * 5 * 2
        except:
            ZeroDivisionError
            deleteList.append(v)

        #gvprop_size[v] = ratio

    #g.vertex_properties['size'] = gvprop_size

    #### Delete linkless vertices #######

    for v in reversed(sorted(deleteList)):
        g.remove_vertex(v)

    for v in reversed(sorted(deleteList)):
        t.remove_vertex(v)

    tpos = gt.radial_tree_layout(t,
                                 t.vertex(t.num_vertices() - 1),
                                 rel_order_leaf=True)

    #######

    ############ stochastic BLOCK MODEL ####################

    state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True, verbose=True)
    t = gt.get_hierarchy_tree(state)[0]
    tpos = pos = gt.radial_tree_layout(t,
                                       t.vertex(t.num_vertices() - 1),
                                       weighted=True)

    # in order to make sure labels fit in the image we have to manually adjust the
    # co-ordinates of each vertex.

    x, y = gt.ungroup_vector_property(tpos, [0, 1])
    x.a = (x.a - x.a.min()) / (x.a.max() - x.a.min()) * 1400 + 400
    y.a = (y.a - y.a.min()) / (y.a.max() - y.a.min()) * 1400 + 400
    tpos = gt.group_vector_property([x, y])

    # This draws the 'Bezier spline control points' for edges
    # it draws the edges directed in graph g, but uses the hierarchy / positioning of graph t.
    cts = gt.get_hierarchy_control_points(g, t, tpos)

    pos = g.own_property(tpos)

    gt.graph_draw(
        g,
        vertex_text_position="centered",
        vertex_text=g.vertex_properties["label"],
        vertex_font_size=14,
        vertex_anchor=0,
        vertex_aspect=1,
        vertex_shape="square",
        vertex_fill_color=g.vertex_properties["vcolour"],
        vertex_size=10,
        fit_view=False,
        # edge_color=g.edge_properties["colour"],
        # edge_pen_width=g.edge_properties["thickness"],
        edge_end_marker="none",
        edge_pen_width=0.2,
        edge_color="white",
        bg_color=[0, 0, 0, 1],
        output_size=[2000, 2000],
        output='UK_ONLY_RELATIONSHIPS_stochastic.png',
        pos=pos,
        edge_control_points=cts)

    if __name__ == '__main__':
        pyjd.setup("Hello.html")
コード例 #24
0
        Y = fct.mds_shortest_paths(G, dimension)
        D = fct.comp_clusters_communities(
            Y,
            G.graph['labels_communities'],
            algo=False,
            n_clusters=G.graph['number_communities'])
        zmds.append(np.mean(D))

        g = gt.load_graph_from_csv(G.graph['edgelist'],
                                   directed=isDirected,
                                   csv_options={
                                       "delimiter": " ",
                                       "quotechar": '"'
                                   })
        block = gt.minimize_nested_blockmodel_dl(
            g,
            B_min=G.graph['number_communities'],
            B_max=G.graph['number_communities'])
        num_block = block.levels[0].get_B()
        block = block.levels[0].get_blocks()
        partition = [0 for i in range(G.number_of_nodes())]
        for i in range(G.number_of_nodes()):  #for every node
            partition[i] = block[i]
        zsbm.append(ami(partition, G.graph['labels_communities']))

        igraph = ig.Read_Edgelist(G.graph['edgelist'])
        part = igraph.community_infomap()
        partition = [0 for i in range(G.number_of_nodes())]
        for i in range(G.number_of_nodes()):
            for j in range(len(part)):
                if i in part[j]:
                    partition[i] = j
コード例 #25
0
ファイル: communities_mcmc.py プロジェクト: bio16/TP_especial
import numpy as np
import argparse as arg

argparser = arg.ArgumentParser(description='')
argparser.add_argument('file', help='graph file')
argparser.add_argument('--overlap', action='store_true')
argparser.add_argument('--plot', action='store_true')

args = argparser.parse_args()

g = gt.load_graph(args.file)

weight = g.ep['weight'] if 'weight' in g.ep.keys() else None

nested_state = gt.minimize_nested_blockmodel_dl(g,
                                                deg_corr=True,
                                                overlap=args.overlap)

levels = nested_state.get_levels()
partitions = [np.array(list(level.get_blocks())) for level in levels]

filename = 'communities_mcmc_' + '.'.join(
    args.file.split('/')[-1].split('.')[:-1]) + '_hierarchy'
if args.overlap:
    filename += '-overlap'

np.save('./partitions/' + filename + '_partition-level-0.npy', partitions[0])
np.save('./partitions/' + filename + '_partition-level-1.npy', partitions[1])

if args.plot:
    nested_state.draw(output='./schemes/' + filename + ".pdf")
コード例 #26
0
        else:
            ptu_hrange[i] = 'I'

    v_HRange = graph.new_vertex_property('string')
    for v in graph.vertices():
        v_HRange[v] = ptu_hrange[graph.vp.Ptu[v]]

    return v_HRange


state_list, entropy_list = [], []
for k in range(args.nToss):
    if args.hierarchical:
        # Nested stochastic block model (hierarchical SBM)
        if (args.weight_model == 'None'):
            state = gt.minimize_nested_blockmodel_dl(g, deg_corr=args.deg_corr)
        elif (args.weight_model == 'Exponential'):
            state = gt.minimize_nested_blockmodel_dl(
                g,
                deg_corr=args.deg_corr,
                state_args=dict(recs=[y], rec_types=['real-exponential']))
        else:
            state = gt.minimize_nested_blockmodel_dl(
                g,
                deg_corr=args.deg_corr,
                state_args=dict(recs=[y], rec_types=['real-normal']))
        state_0 = state.get_levels()[0]
        nClass = len(np.unique(state_0.get_blocks().a))
    else:
        # Flat stochastic block model (SBM)
        if (args.weight_model == 'None'):
コード例 #27
0
ファイル: simulation.py プロジェクト: santirdnd/PTU_paper
        print("  NMI_SBMstd = %.5f\tNMI_SBMCCstd = %.5f" % (np.std(np.asarray(nmi_sbm), 0, ddof=1)[0], np.std(np.asarray(nmi_sbm), 0, ddof=1)[1]), flush=True)
    print(flush=True)
    ami_sbm.append([adjusted_mutual_info_score(g.vp.RealClass.a, blocks.a), adjusted_mutual_info_score(g.vp.RealClass.a, list(preds))])
    print("  AMI_SBM = %.5f\tAMI_SBMCC = %.5f" % (ami_sbm[i][0], ami_sbm[i][1]), flush=True)
    print("  AMI_SBMavg = %.5f\tAMI_SBMCCavg = %.5f" % (np.mean(np.asarray(ami_sbm), 0)[0], np.mean(np.asarray(ami_sbm), 0)[1]), flush=True)
    if i > 2:
        print("  AMI_SBMstd = %.5f\tAMI_SBMCCstd = %.5f" % (np.std(np.asarray(ami_sbm), 0, ddof=1)[0], np.std(np.asarray(ami_sbm), 0, ddof=1)[1]), flush=True)
    print(flush=True)
    ar_sbm.append([adjusted_rand_score(g.vp.RealClass.a, blocks.a), adjusted_rand_score(g.vp.RealClass.a, list(preds))])
    print("  AR_SBM = %.5f\tAR_SBMCC = %.5f" % (ar_sbm[i][0], ar_sbm[i][1]), flush=True)
    print("  AR_SBMavg = %.5f\tAR_SBMCCavg = %.5f" % (np.mean(np.asarray(ar_sbm), 0)[0], np.mean(np.asarray(ar_sbm), 0)[1]), flush=True)
    if i > 2:
        print("  AR_SBMstd = %.5f\tAR_SBMCCstd = %.5f" % (np.std(np.asarray(ar_sbm), 0, ddof=1)[0], np.std(np.asarray(ar_sbm), 0, ddof=1)[1]), flush=True)
    print(flush=True)

    state_nested = gt.minimize_nested_blockmodel_dl(g, deg_corr=False)
    #write_classes_hierarchical('sim/sim_NSBM.tsv', g, state_nested)
    state_nested_l0 = state_nested.get_levels()[0]
    #state_nested_l0.draw(output="sim/sim_NSBM.png")
    blocks_n = state_nested_l0.get_blocks()
    preds_n = get_blocksCC(g, blocks_n)
    nmi_nsbm.append([normalized_mutual_info_score(g.vp.RealClass.a, blocks_n.a), normalized_mutual_info_score(g.vp.RealClass.a, list(preds_n))])
    print("  NMI_NSBM = %.5f\tNMI_NSBMCC = %.5f" % (nmi_nsbm[i][0], nmi_nsbm[i][1]), flush=True)
    print("  NMI_NSBMavg = %.5f\tNMI_NSBMCCavg = %.5f" % (np.mean(np.asarray(nmi_nsbm), 0)[0], np.mean(np.asarray(nmi_nsbm), 0)[1]), flush=True)
    if i > 2:
        print("  NMI_NSBMstd = %.5f\tNMI_NSBMCCstd = %.5f" % (np.std(np.asarray(nmi_nsbm), 0, ddof=1)[0], np.std(np.asarray(nmi_nsbm), 0, ddof=1)[1]), flush=True)
    print(flush=True)
    ami_nsbm.append([adjusted_mutual_info_score(g.vp.RealClass.a, blocks_n.a), adjusted_mutual_info_score(g.vp.RealClass.a, list(preds_n))])
    print("  AMI_NSBM = %.5f\tAMI_NSBMCC = %.5f" % (ami_nsbm[i][0], ami_nsbm[i][1]), flush=True)
    print("  AMI_NSBMavg = %.5f\tAMI_NSBMCCavg = %.5f" % (np.mean(np.asarray(ami_nsbm), 0)[0], np.mean(np.asarray(ami_nsbm), 0)[1]), flush=True)
    if i > 2:
コード例 #28
0
ファイル: testgraphtool.py プロジェクト: dimbi/Sloth
start = time.time()
def corr(a,b):                                                                                                                                         
    if a==b:
        return 0.999
    else:
        return 0.001
        
g, bm = gt.random_graph(100000, 
                        lambda: poisson(20), 
                        directed=False,
                        block_membership=lambda: randint(50),
                        vertex_corr=corr)
                        
                        
print(g.num_vertices(), g.num_edges())
state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)


diff = time.time() - start

nodes = g.num_vertices()
edges = g.num_edges()

filename = 'undirected_%dkN_%dkE' % (nodes/1000,edges/1000)
with open(filename+'.txt','w+') as f:
    f.write("test: %d nodes %d edges\n" % (nodes,edges))

    hours = math.floor(diff/3600.0)
    min = math.floor((diff % 3600)/60.0)
    sec = math.floor((diff % 60))
    
コード例 #29
0
ファイル: plotting.py プロジェクト: TheChymera/DeGraVi
def circular_depgraph(g,
	plot_type="graph",
	save_as="~/depgraph.png",
	):

	save_as = os.path.abspath(os.path.expanduser(save_as))

	state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)
	t = gt.get_hierarchy_tree(state)[0]
	tpos = pos = gt.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
	cts = gt.get_hierarchy_control_points(g, t, tpos)
	pos = g.own_property(tpos)

	vtext_rotation = g.new_vertex_property('double')
	g.vertex_properties['vtext_rotation'] = vtext_rotation

	for v in g.vertices():
		#set vtext_rotation
		if pos[v][0] >= 0:
			try:
				vtext_rotation[v] = math.atan(pos[v][1]/pos[v][0])
			except ZeroDivisionError:
				vtext_rotation[v] = 0
		else:
			vtext_rotation[v] = math.pi + math.atan(pos[v][1]/pos[v][0])

	#here we do black magic to get proper output size (controls vertex spacing) and scaling
	vertex_number = g.num_vertices()
	view_zoom = (vertex_number*36.0485)**(-10.068/vertex_number)+0.017037
	output_size = int(vertex_number*5.9+400)
	dpi=300
	if output_size >= 18000:
		print("WARNING: You are exceding the maximal printable size - 150cm in one dimension at 300dpi")
	print("Plotting dependency graph containing {0} packages, at a resolution of {1} pixels by {1} pixels".format(vertex_number, output_size))

	if plot_type == "graph":
		gt.graph_draw(g, pos=pos,
				edge_control_points=cts,
				vertex_anchor=0,
				vertex_color=g.vertex_properties['vcolor'],
				vertex_fill_color=g.vertex_properties['vcolor'],
				vertex_font_size=14,
				vertex_text=g.vertex_properties['vlabel'],
				vertex_text_position=6.2,
				vertex_text_rotation=g.vertex_properties['vtext_rotation'],
				vertex_text_color=g.vertex_properties['vtext_color'],
				vertex_size=16,
				edge_start_marker="none",
				edge_mid_marker="none",
				edge_end_marker="none",
				edge_gradient=g.edge_properties["egradient"],
				eorder=g.edge_properties["eorder"],
				bg_color=[1,1,1,1],
				output_size=[output_size,output_size],
				output=save_as,
				fit_view=view_zoom,
				)
	elif plot_type == "state":
		gt.draw_hierarchy(state,
			vertex_text_position=1,
			vertex_font_size=12,
			vertex_text=g.vertex_properties['label'],
			vertex_text_rotation=g.vertex_properties['text_rotation'],
			vertex_anchor=0,
			bg_color=[1,1,1,1],
			output_size=[output_size,output_size],
			output=save_as,
			)
コード例 #30
0
ファイル: 02.py プロジェクト: Ziaeemehr/miscellaneous
import graph_tool.all as gt
import numpy as np
from sys import exit

adj = np.genfromtxt('C.dat')
g = gt.Graph(directed=False)
# g.add_edge_list(adj.nonzero())

g.add_vertex(len(adj))
edge_weights = g.new_edge_property('double')
num_vertices = adj.shape[0]
for i in range(num_vertices - 1):
    for j in range(i + 1, num_vertices):
        if adj[i, j] != 0:
            e = g.add_edge(i, j)
            edge_weights[e] = adj[i, j]

# pos = gt.arf_layout(g, max_iter=0)
pos = gt.radial_tree_layout(g, g.vertex(0))
gt.graph_draw(g, output="radial_tree_layout.pdf")

state = gt.minimize_nested_blockmodel_dl(g)
gt.draw_hierarchy(state, output="celegansneural_nested_mdl.pdf")
コード例 #31
0
ファイル: detecting.py プロジェクト: kkonevets/ego
def find_communities(nnodes, edges, alg, params=None):
    def membership2cs(membership):
        cs = {}
        for i, m in enumerate(membership):
            cs.setdefault(m, []).append(i)
        return cs.values()

    def connected_subgraphs(G: nx.Graph):
        for comp in nx.connected_components(G):
            sub = nx.induced_subgraph(G, comp)
            sub = nx.convert_node_labels_to_integers(sub,
                                                     label_attribute='old')
            yield sub

    def apply_subgraphs(algorithm, **params):
        cs = []
        for sub in connected_subgraphs(G):
            if len(sub.nodes) <= 3:
                coms = [sub.nodes]  # let it be a cluster
            else:
                coms = algorithm(sub, **params)
                if hasattr(coms, 'communities'):
                    coms = coms.communities

            for com in coms:
                cs.append([sub.nodes[i]['old'] for i in set(com)])
        return cs

    def karate_apply(algorithm, graph, **params):
        model = algorithm(**params)
        model.fit(graph)
        return membership2cs(model.get_memberships().values())

    if alg == 'big_clam':
        c = -1 if params['c'] == 'auto' else int(params['c'])
        cs = BigClam('../../snap').run(edges, c=c, xc=int(params['xc']))
    elif alg in ('gmm', 'kclique', 'lprop', 'lprop_async', 'fluid',
                 'girvan_newman', 'angel', 'congo', 'danmf', 'egonet_splitter',
                 'lfm', 'multicom', 'nmnf', 'nnsed', 'node_perception', 'slpa',
                 'GEMSEC', 'EdMot', 'demon'):
        G = nx.Graph()
        G.add_edges_from(edges)

        if alg == 'gmm':
            cs = community.greedy_modularity_communities(G)
        elif alg == 'kclique':
            params = {k: float(v) for k, v in params.items()}
            cs = community.k_clique_communities(G, **params)
        elif alg == 'lprop':
            cs = community.label_propagation_communities(G)
        elif alg == 'lprop_async':
            cs = community.asyn_lpa_communities(G, seed=0)
        elif alg == 'fluid':
            params = {k: int(v) for k, v in params.items()}
            params['seed'] = 0
            cs = apply_subgraphs(community.asyn_fluidc, **params)
        elif alg == 'girvan_newman':
            comp = community.girvan_newman(G)
            for cs in itertools.islice(comp, int(params['k'])):
                pass
        elif alg == 'angel':
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.angel(G, **params).communities
        elif alg == 'congo':  # too slow
            ncoms = int(params['number_communities'])
            cs = []
            for sub in connected_subgraphs(G):
                if len(sub.nodes) <= max(3, ncoms):
                    cs.append(sub.nodes)  # let it be a cluster
                else:
                    coms = cdlib.congo(sub,
                                       number_communities=ncoms,
                                       height=int(params['height']))
                    for com in coms.communities:
                        cs.append([sub.nodes[i]['old'] for i in set(com)])
        elif alg == 'danmf':  # no overlapping
            cs = apply_subgraphs(cdlib.danmf)
        elif alg == 'egonet_splitter':
            params['resolution'] = float(params['resolution'])
            cs = apply_subgraphs(cdlib.egonet_splitter, **params)
        elif alg == 'lfm':
            coms = cdlib.lfm(G, float(params['alpha']))
            cs = coms.communities
        elif alg == 'multicom':
            cs = cdlib.multicom(G, seed_node=0).communities
        elif alg == 'nmnf':
            params = {k: int(v) for k, v in params.items()}
            cs = apply_subgraphs(cdlib.nmnf, **params)
        elif alg == 'nnsed':
            cs = apply_subgraphs(cdlib.nnsed)
        elif alg == 'node_perception':  # not usable
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.node_perception(G, **params).communities
        elif alg == 'slpa':
            params["t"] = int(params["t"])
            params["r"] = float(params["r"])
            cs = cdlib.slpa(G, **params).communities
        elif alg == 'demon':
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.demon(G, **params).communities
        elif alg == 'GEMSEC':
            # gamma = float(params.pop('gamma'))
            params = {k: int(v) for k, v in params.items()}
            # params['gamma'] = gamma
            params['seed'] = 0
            _wrap = partial(karate_apply, karateclub.GEMSEC)
            cs = apply_subgraphs(_wrap, **params)
        elif alg == 'EdMot':
            params = {k: int(v) for k, v in params.items()}
            _wrap = partial(karate_apply, karateclub.EdMot)
            cs = apply_subgraphs(_wrap, **params)

    elif alg in ('infomap', 'community_leading_eigenvector', 'leig',
                 'multilevel', 'optmod', 'edge_betweenness', 'spinglass',
                 'walktrap', 'leiden', 'hlc'):
        G = igraph.Graph()
        G.add_vertices(nnodes)
        G.add_edges(edges)

        if alg == 'infomap':
            vcl = G.community_infomap(trials=int(params['trials']))
            cs = membership2cs(vcl.membership)
        elif alg == 'leig':
            clusters = None if params['clusters'] == 'auto' else int(
                params['clusters'])
            vcl = G.community_leading_eigenvector(clusters=clusters)
            cs = membership2cs(vcl.membership)
        elif alg == 'multilevel':
            vcl = G.community_multilevel()
            cs = membership2cs(vcl.membership)
        elif alg == 'optmod':  # too long
            membership, modularity = G.community_optimal_modularity()
            cs = membership2cs(vcl.membership)
        elif alg == 'edge_betweenness':
            clusters = None if params['clusters'] == 'auto' else int(
                params['clusters'])
            dendrogram = G.community_edge_betweenness(clusters, directed=False)
            try:
                clusters = dendrogram.as_clustering()
            except:
                return []
            cs = membership2cs(clusters.membership)
        elif alg == 'spinglass':  # only for connected graph
            vcl = G.community_spinglass(parupdate=True,
                                        update_rule=params['update_rule'],
                                        start_temp=float(params['start_temp']),
                                        stop_temp=float(params['stop_temp']))
            cs = membership2cs(vcl.membership)
        elif alg == 'walktrap':
            dendrogram = G.community_walktrap(steps=int(params['steps']))
            try:
                clusters = dendrogram.as_clustering()
            except:
                return []
            cs = membership2cs(clusters.membership)
        elif alg == 'leiden':
            vcl = G.community_leiden(
                objective_function=params['objective_function'],
                resolution_parameter=float(params['resolution_parameter']),
                n_iterations=int(params['n_iterations']))
            cs = membership2cs(vcl.membership)
        elif alg == 'hlc':
            algorithm = HLC(G, min_size=int(params['min_size']))
            cs = algorithm.run(None)

    elif alg in ("sbm", "sbm_nested"):
        np.random.seed(42)
        gt.seed_rng(42)

        G = gt.Graph(directed=False)
        G.add_edge_list(edges)

        deg_corr = bool(params['deg_corr'])
        B_min = None if params['B_min'] == 'auto' else int(params['B_min'])
        B_max = None if params['B_max'] == 'auto' else int(params['B_max'])

        if alg == "sbm":
            state = gt.minimize_blockmodel_dl(G,
                                              deg_corr=deg_corr,
                                              B_min=B_min,
                                              B_max=B_max)

            membership = state.get_blocks()
            cs = membership2cs(membership)
        if alg == "sbm_nested":
            state = gt.minimize_nested_blockmodel_dl(G,
                                                     deg_corr=deg_corr,
                                                     B_min=B_min,
                                                     B_max=B_max)
            levels = state.get_bs()
            level_max = int(params['level'])

            membership = {}
            for nid in range(nnodes):
                cid = nid
                level_i = len(levels)
                for level in levels:
                    cid = level[cid]
                    if level_i == level_max:
                        membership.setdefault(cid, []).append(nid)
                        break
                    level_i -= 1

            cs = membership.values()

    else:
        return None

    return list(cs)
コード例 #32
0
        print('Loading the nested stochastic block model...')
        import pickle
        with open('data/m1_500k_state.pkl', 'rb') as f:
            state = pickle.load(f)['state']
    else:
        print(
            'Fitting the nested stochastic block model, by minimizing its description length...'
        )
        #state = gt.minimize_blockmodel_dl(g)
        #state = gt.minimize_nested_blockmodel_dl(g)
        #state_ndc = gt.minimize_nested_blockmodel_dl(g, deg_corr=False)
        #state_dc  = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)
        #state_dc  = gt.minimize_nested_blockmodel_dl(g, deg_corr=True, mcmc_equilibrate_args={'force_niter':100, 'mcmc_args':dict(niter=5)})
        state_dc_w = gt.minimize_nested_blockmodel_dl(
            g,
            deg_corr=True,
            state_args=dict(recs=[g.ep.weights],
                            rec_types=['real-exponential']))

        # print("Non-degree-corrected DL:\t", state_ndc.entropy())
        # print("Degree-corrected DL:\t", state_dc.entropy())
        # print(u"ln \u039b: ", state_dc.entropy() - state_ndc.entropy())

        # state analysis
        print('  elapsed time: %d s' % (time() - start))
        print('Analysing states of nested block model ...')
        state = state_dc_w

    state.print_summary()

    # plot nested SBM