Ejemplo n.º 1
0
 def compute_transitions_old(self):
     import igraph
     g = utils.get_igraph_from_adjacency(
         self._adata.uns['velocyto_transitions'], directed=True)
     vc = igraph.VertexClustering(
         g, membership=self._adata.obs[self._groups_key].cat.codes.values)
     # this stores all single-cell edges in the cluster graph
     cg_full = vc.cluster_graph(combine_edges=False)
     # this is the boolean version that simply counts edges in the clustered graph
     g_bool = utils.get_igraph_from_adjacency(
         self._adata.uns['velocyto_transitions'].astype('bool'), directed=True)
     vc_bool = igraph.VertexClustering(
         g_bool, membership=self._adata.obs[self._groups_key].cat.codes.values)
     cg_bool = vc_bool.cluster_graph(combine_edges='sum')  # collapsed version
     transitions = utils.get_sparse_from_igraph(cg_bool, weight_attr='weight')
     total_n = self._neighbors.n_neighbors * np.array(vc_bool.sizes())
     transitions_ttest = transitions.copy()
     transitions_confidence = transitions.copy()
     from scipy.stats import ttest_1samp
     for i in range(transitions.shape[0]):
         neighbors = transitions[i].nonzero()[1]
         for j in neighbors:
             forward = cg_full.es.select(_source=i, _target=j)['weight']
             backward = cg_full.es.select(_source=j, _target=i)['weight']
             # backward direction: add minus sign
             values = np.array(list(forward) + list(-np.array(backward)))
             # require some minimal number of observations
             if len(values) < 5:
                 transitions_ttest[i, j] = 0
                 transitions_ttest[j, i] = 0
                 transitions_confidence[i, j] = 0
                 transitions_confidence[j, i] = 0
                 continue
             t, prob = ttest_1samp(values, 0.0)
             if t > 0:
                 # number of outgoing edges greater than number of ingoing edges
                 # i.e., transition from i to j
                 transitions_ttest[i, j] = -np.log10(max(prob, 1e-10))
                 transitions_ttest[j, i] = 0
             else:
                 transitions_ttest[j, i] = -np.log10(max(prob, 1e-10))
                 transitions_ttest[i, j] = 0
             # geom_mean
             geom_mean = np.sqrt(total_n[i] * total_n[j])
             diff = (len(forward) - len(backward)) / geom_mean
             if diff > 0:
                 transitions_confidence[i, j] = diff
                 transitions_confidence[j, i] = 0
             else:
                 transitions_confidence[j, i] = -diff
                 transitions_confidence[i, j] = 0
     transitions_ttest.eliminate_zeros()
     transitions_confidence.eliminate_zeros()
     # transpose in order to match convention of stochastic matrices
     # entry ij means transition from j to i
     self.transitions_ttest = transitions_ttest.T
     self.transitions_confidence = transitions_confidence.T
Ejemplo n.º 2
0
 def clusters(self, value: Union[ig.VertexClustering, dict[int, list[int]]]) -> None:
     if isinstance(value, ig.VertexClustering):
         self._partition = value
     elif isinstance(value, dict):
         sorted_node_community_map = dict(sorted(value.items()))
         part = ig.VertexClustering(
             self.graph,
             membership=[i[0] for i in sorted_node_community_map.values()],
         )
         self._partition = part
     elif isinstance(value, list):
         part = ig.VertexClustering(self.graph, membership=value)
         self._partition = part
Ejemplo n.º 3
0
    def compute(self, two):
        g = self.build_graph()

        comms = igraph.Graph.community_multilevel(g,
                                                  weights="weight",
                                                  return_levels=False)
        memb = comms.membership

        # force dichotomy (horrible exponential time algo)
        if two:
            bestmod = -1
            best = None

            for i in range(2**len(comms)):
                memb2 = twocomms(memb, i)
                vc = igraph.VertexClustering(g, membership=memb2)
                vc.recalculate_modularity()
                m = vc.modularity
                if m >= bestmod:
                    bestmod = m
                    best = memb2

            memb = best

        mod = g.modularity(memb, weights="weight")
        return memb, mod
Ejemplo n.º 4
0
def gan_community_detection():
    gan = load_gan()
    graph = ig.Graph(directed=True)
    graph.add_vertices(load_apps())

    elist = []
    for e in gan.edges():
        u, v = e
        w = float(gan[u][v]['weight'])
        elist.append((u, v, w))

    # write networkx to file
    graph = nx.DiGraph()
    graph.add_weighted_edges_from(elist)
    nx.write_graphml(graph, GRAPHML_PATH)

    # read file to construct igraph
    graph = ig.Graph.Read_GraphML(GRAPHML_PATH)
    graph.vs['size'] = [10 for i in xrange(len(graph.vs))]

    clusters = graph.community_spinglass()
    membership = clusters.membership
    vc = ig.VertexClustering(graph, membership)

    result = []
    for c in vc:
        result.append(set([graph.vs[i]['id'] for i in c]))

    ig.plot(vc, bbox=(2400, 1400))
    return result, clusters.modularity
Ejemplo n.º 5
0
def plot(drama, caption=True):

    plot = ig.Plot(outputfolder + drama.get("title") + ".png",
                   bbox=(600, 600),
                   background="white")

    try:
        graph = ig.VertexClustering(drama.get("graph")).giant()
        visual_style = {}
        visual_style["layout"] = graph.layout_fruchterman_reingold()
        visual_style["vertex_color"] = "#0000ff"
        visual_style["vertex_shape"] = "rectangle"
        visual_style["vertex_size"] = 8
        visual_style["vertex_label"] = graph.vs["name"]
        visual_style["vertex_label_size"] = 15
        visual_style["vertex_label_dist"] = 1.5
        visual_style["edge_color"] = "#6495ed"
        visual_style["edge_width"] = graph.es["weight"]
        visual_style["bbox"] = (600, 600)
        visual_style["margin"] = 50
        plot.add(graph, **visual_style)
    except:
        pass

    if caption:
        # Make the plot draw itself on the Cairo surface.
        plot.redraw()

        # Grab the surface, construct a drawing context and a TextDrawer.
        ctx = cairo.Context(plot.surface)
        ctx.set_font_size(15)
        drawer = TextDrawer(ctx, drama.get("title"), halign=TextDrawer.CENTER)
        drawer.draw_at(0, 597, width=600)

    plot.save()
Ejemplo n.º 6
0
 def _compute_connectivities_v1_2(self):
     import igraph
     ones = self._neighbors.distances.copy()
     ones.data = np.ones(len(ones.data))
     # should be directed if we deal with distances
     g = utils.get_igraph_from_adjacency(ones, directed=True)
     vc = igraph.VertexClustering(
         g, membership=self._adata.obs[self._groups_key].cat.codes.values)
     ns = vc.sizes()
     n = sum(ns)
     es_inner_cluster = [vc.subgraph(i).ecount() for i in range(len(ns))]
     cg = vc.cluster_graph(combine_edges='sum')
     inter_es = utils.get_sparse_from_igraph(cg, weight_attr='weight')
     es = np.array(es_inner_cluster) + inter_es.sum(axis=1).A1
     inter_es = inter_es + inter_es.T  # \epsilon_i + \epsilon_j
     connectivities = inter_es.copy()
     expected_n_edges = inter_es.copy()
     inter_es = inter_es.tocoo()
     for i, j, v in zip(inter_es.row, inter_es.col, inter_es.data):
         expected_random_null = (es[i]*ns[j] + es[j]*ns[i])/(n - 1)
         if expected_random_null != 0:
             scaled_value = v / expected_random_null
         else:
             scaled_value = 1
         if scaled_value > 1:
             scaled_value = 1
         connectivities[i, j] = scaled_value
         expected_n_edges[i, j] = expected_random_null
     # set attributes
     self.ns = ns
     self.expected_n_edges_random = expected_n_edges
     self.connectivities = connectivities
     self.connectivities_tree = self._get_connectivities_tree_v1_2()
     return inter_es.tocsr(), connectivities
Ejemplo n.º 7
0
 def _compute_connectivities_v1_0(self):
     import igraph
     ones = self._neighbors.connectivities.copy()
     ones.data = np.ones(len(ones.data))
     g = utils.get_igraph_from_adjacency(ones)
     vc = igraph.VertexClustering(
         g, membership=self._adata.obs[self._groups_key].cat.codes.values)
     ns = vc.sizes()
     cg = vc.cluster_graph(combine_edges='sum')
     inter_es = utils.get_sparse_from_igraph(cg, weight_attr='weight')/2
     connectivities = inter_es.copy()
     inter_es = inter_es.tocoo()
     n_neighbors_sq = self._neighbors.n_neighbors**2
     for i, j, v in zip(inter_es.row, inter_es.col, inter_es.data):
         # have n_neighbors**2 inside sqrt for backwards compat
         geom_mean_approx_knn = np.sqrt(
             n_neighbors_sq * ns[i] * ns[j])
         if geom_mean_approx_knn != 0:
             scaled_value = v / geom_mean_approx_knn
         else:
             scaled_value = 1
         connectivities[i, j] = scaled_value
     # set attributes
     self.ns = ns
     self.connectivities = connectivities
     self.connectivities_tree = self._get_connectivities_tree_v1_0(inter_es)
     return inter_es.tocsr(), connectivities
Ejemplo n.º 8
0
def get_communities(g, n, path, filename, algorithm='label_propagation'):
    """ Gets a number of igraph.VertexClustering objects.

        These objects are loaded from file if possible, otherwise they are
        found using the given algorithm.

        :param g: The graph to find communities in.
        :param n: The number of communities to find.
        :param path: The path to the base folder for the graph.
        :param filename: The filename of the graph to use.
        :param algorithm: The name of the clustering algorithm to use.

        The filename and path arguments are used to find clusters stored on
        disk. Any new clusters are stored along with the ones already present
        for future use.

        :return: A list of VertexClustering objects

        Examples
        --------
        >>> path = 'data/testing'
        >>> filename = 'test1'
        >>> g = load_network(path, filename)
        >>> comms = get_communities(g, 10, path, filename, algorithm='random_walk')
        >>> len(comms)
        10
    """
    # load any preexisting clusters
    cluster_path = '{}/communities/{}/{}.json'.format(path, algorithm,
                                                      filename)
    ensure_folder(cluster_path)
    h = open(cluster_path, 'a')
    try:
        multithreading.lock_file_handle(h)
        try:
            cluster_sets = json.load(open(cluster_path, 'r'))
        except ValueError:
            # the file is probably empty because we just made it
            cluster_sets = []
        logger.info('Loaded {} communities'.format(len(cluster_sets)))
        # add new clusters if needed
        while len(cluster_sets) < n:
            logger.debug('{} / {} communities'.format(len(cluster_sets), n))
            clustering = _algorithms[algorithm](g)
            cluster_sets.append({
                'membership':
                clustering.membership,
                'modularity_params':
                clustering._modularity_params
            })
        # save the cluster sets
        json.dump(cluster_sets, open(cluster_path, 'w'), indent=2)
    finally:
        multithreading.unlock_file_handle(h)
        h.close()

    # construct a list of objects
    clusters = [igraph.VertexClustering(g, **c) for c in cluster_sets]
    return clusters[:n]  # return only the first n
Ejemplo n.º 9
0
 def compute_transitions_coarse(self):
     import igraph
     g = utils.get_igraph_from_adjacency(
         self._adata.uns['rna_velocity']['graph'], directed=True)
     self.vc = igraph.VertexClustering(
         g, membership=self._adata.obs[self._groups].cat.codes.values)
     cg = self.vc.cluster_graph(combine_edges='sum')
     self.transitions_coarse = utils.get_sparse_from_igraph(cg, weight_attr='weight')
Ejemplo n.º 10
0
def modularity(p_list, g):
    """ Calculates the modularity of a partition of g without accouting for
        edge weight.

        Uses the constraints implied in the publication as well as constraints
        necessary to partition a graph that is disconnnected. the given
        constraints include: returning a modularity of one to paritions of a
        single modules containing a single isolated node, returning a
        modularity of zero to partitions of multiple isolated modules,
        incrementing the modularity by 1/# of modules for a module that
        contains a single isolatednode. Otherwise, the modularity is calculated
        as proposed in equation 1.

        Note: This modularity calculation is much faster than
        modularity_weights but is less accurate.

        Parameters
        ----------
        p: list
            Membership list of interest.
        g: igraph.Graph
            Graph of interest.

        Returns
        -------
        m: float
            Modularity of partition. The value will be between 0 and 1.

        References
        ----------
        R. Guimera, L. Amaral
    """
    p = igraph.VertexClustering(g, p_list)
    L = g.ecount()
    # Return modularity of 1 for module containing single isolated node
    if L == 0 and g.vcount == 1:
        return 1
    # Return modularity of 0 for module containing multiple isolated nodes
    if L == 0 and g.vcount > 1:
        return 0
    # Calculate modularity
    m = 0
    for i, mod in enumerate(p):
        # Skip if empty module
        if len(mod) == 0:
            continue
        # A module that contains a single isolated node adds 1/# of modules
        if len(mod) == 1 and g.degree(mod[0]) == 0:
            m += len(p)**-1
            continue
        # Create subgraph containing module of interest
        g_sub = g.subgraph(mod)
        ls = float(g_sub.ecount())
        ds = float(sum(g_sub.degree()))
        # Penalty applied is proportional to the number of components in subgraph
        m += ((ls / L) - (ds / L)**2)/len(g_sub.components())
    return m 
Ejemplo n.º 11
0
 def Build_random_clustering(cls, vc):
     '''
     build a vertex clustering according to the vc which is input
     the output of the vc_gen has the same number of nodes in each community
     '''
     nodes_count = vc.n
     l = vc.membership
     random.shuffle(l)
     return igraph.VertexClustering(vc.graph, l)
Ejemplo n.º 12
0
    def LocalMove(self, elligible):
        # Make a copy to avoid prematurely altering "true" communities
        comm = self.comm

        # this is number of community
        # there is no empty group
        num_comm = 0

        while num_comm != self.number_of_groups:
            node = np.random.choice(
                elligible
            )  # Take a random node from the community being passed in

            # Making the local move
            new_comms = set(comm.values())

            # two cases one for all nodes has same comm, and others not
            # fix here by sangpil
            if len(new_comms) == 1:
                selected = comm[node]
                # get from ground truth set of comms
                new_comms_list = [
                    val for val in self.comms_set if val != selected
                ]
                comm[node] = np.random.choice(new_comms_list)
            else:
                selected = comm[node]
                new_comms_list = [
                    val for val in self.comms_set if val != selected
                ]
                comm[node] = np.random.choice(new_comms_list)

            num_comm = len(list(set(comm.values())))

        # Getting modularity of post-local move partitions
        partition = ig.VertexClustering(self.ntwk,
                                        membership=list(comm.values()))
        ltemp_modularity = partition.q

        # If move is better, adjust the community and modularity accordingly
        # Alternatively, if move is worse but succeed temperature calculations
        better = bool(ltemp_modularity >= self.modularity)
        temp_move = bool(
            np.random.rand() <= np.exp((ltemp_modularity - self.modularity) *
                                       (1 / self.temp)))

        if better or temp_move:
            self.comm = comm
            self.modularity = ltemp_modularity
            self.t_modularity_list.append(ltemp_modularity)
            self.temp *= 0.995

            return True
        else:
            self.temp *= 0.995
            return False
Ejemplo n.º 13
0
 def compute_connectivities_coarse(self):
     import igraph
     ones = self.connectivities.copy()
     # graph where edges carry weight 1
     ones.data = np.ones(len(ones.data))
     g = utils.get_igraph_from_adjacency(ones)
     self.vc = igraph.VertexClustering(
         g, membership=self._adata.obs[self._groups].cat.codes.values)
     cg = self.vc.cluster_graph(combine_edges='sum')
     self.connectivities_coarse = utils.get_sparse_from_igraph(cg, weight_attr='weight')/2
Ejemplo n.º 14
0
    def compute_transitions(self):
        vkey = 'velocity_graph'
        if vkey not in self._adata.uns:
            if 'velocyto_transitions' in self._adata.uns:
                self._adata.uns[vkey] = self._adata.uns['velocyto_transitions']
                logg.debug(
                    "The key 'velocyto_transitions' has been changed to 'velocity_graph'."
                )
            else:
                raise ValueError(
                    'The passed AnnData needs to have an `uns` annotation '
                    "with key 'velocity_graph' - a sparse matrix from RNA velocity."
                )
        if self._adata.uns[vkey].shape != (self._adata.n_obs,
                                           self._adata.n_obs):
            raise ValueError(
                f"The passed 'velocity_graph' have shape {self._adata.uns[vkey].shape} "
                f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}"
            )
        # restore this at some point
        # if 'expected_n_edges_random' not in self._adata.uns['paga']:
        #     raise ValueError(
        #         'Before running PAGA with `use_rna_velocity=True`, run it with `False`.')
        import igraph

        g = _utils.get_igraph_from_adjacency(
            self._adata.uns[vkey].astype('bool'),
            directed=True,
        )
        vc = igraph.VertexClustering(
            g, membership=self._adata.obs[self._groups_key].cat.codes.values)
        # set combine_edges to False if you want self loops
        cg_full = vc.cluster_graph(combine_edges='sum')
        transitions = _utils.get_sparse_from_igraph(cg_full,
                                                    weight_attr='weight')
        transitions = transitions - transitions.T
        transitions_conf = transitions.copy()
        transitions = transitions.tocoo()
        total_n = self._neighbors.n_neighbors * np.array(vc.sizes())
        # total_n_sum = sum(total_n)
        # expected_n_edges_random = self._adata.uns['paga']['expected_n_edges_random']
        for i, j, v in zip(transitions.row, transitions.col, transitions.data):
            # if expected_n_edges_random[i, j] != 0:
            #     # factor 0.5 because of asymmetry
            #     reference = 0.5 * expected_n_edges_random[i, j]
            # else:
            #     # approximate
            #     reference = self._neighbors.n_neighbors * total_n[i] * total_n[j] / total_n_sum
            reference = np.sqrt(total_n[i] * total_n[j])
            transitions_conf[i, j] = 0 if v < 0 else v / reference
        transitions_conf.eliminate_zeros()
        # transpose in order to match convention of stochastic matrices
        # entry ij means transition from j to i
        self.transitions_confidence = transitions_conf.T
Ejemplo n.º 15
0
    def GlobalMove(self, elligible):

        comm = self.comm

        #print(comm)
        # this is number of community
        # there is no empty group

        num_comm = 0 

        while num_comm != self.number_of_groups :
          
          if len(elligible) == 2 :
            node_choice = [i for i in range(2, len(elligible)+1)] #list of number for nodes being passed in 
          else :
             node_choice = [i for i in range(2, len(elligible))]
          #print("elligible", elligible)

          node_num = np.random.choice(node_choice)
          node_list = list(set(np.random.choice(elligible, node_num, replace=False))) #Take several random nodes (2~ length of elligible)

          s_node = np.random.choice(node_list) #select single node to check value
          selected = comm[s_node]
          new_comms_list = [val for val in self.comms_set if val != selected]

          #change values in all processing data
          change_comm = np.random.choice(new_comms_list) # number for new comm
          for node in node_list :
            comm[node] = change_comm

          num_comm = len(list(set(comm.values())))
        
        #print(num_comm)
        partition = ig.VertexClustering(self.ntwk, membership= list(comm.values()))
        gtemp_modularity = partition.q

        #print("problem", gtemp_modularity)
        better = bool(gtemp_modularity >= self.modularity)
        temp_move = bool(np.random.rand() <= np.exp((gtemp_modularity - self.modularity) * (1/self.temp)))

        if better or temp_move :

          self.comm = comm
          self.modularity = gtemp_modularity
          self.t_modularity_list.append(gtemp_modularity)
          self.temp = self.temp /(1+(self.cooling*self.temp))

          return True
        
        else:
          self.temp = self.temp /(1+(self.cooling*self.temp))

          return False
Ejemplo n.º 16
0
    def LocalMove(self, elligible):
        # Make a copy to avoid prematurely altering "true" communities
        comm = self.comm
        #print(comm)
        # this is number of community
        # there is no empty group
        num_comm = 0

        while num_comm != self.number_of_groups:
            node = np.random.choice(
                elligible
            )  # Take a random node from the community being passed in

            # Making the local move
            new_comms = set(comm.values())

            selected = comm[node]
            new_comms_list = [val for val in self.comms_set if val != selected]
            #print(new_comms_list)
            comm[node] = np.random.choice(new_comms_list)

            num_comm = len(list(set(comm.values())))
            #print(num_comm, "stuck here2")

        # Getting modularity of post-local move partitions
        partition = ig.VertexClustering(self.ntwk,
                                        membership=list(comm.values()))
        ltemp_modularity = partition.q

        # If move is better, adjust the community and modularity accordingly
        # Alternatively, if move is worse but succeed temperature calculations

        better = bool(ltemp_modularity >= self.modularity)
        print("L, Former mod {}, After mod {}".format(self.modularity,
                                                      ltemp_modularity))
        temp_move = bool(
            np.random.rand() <= np.exp((ltemp_modularity - self.modularity) *
                                       (1 / self.temp)))

        if better or temp_move:

            self.optim_partition = partition
            self.comm = comm
            self.modularity = ltemp_modularity
            self.t_modularity_list.append(ltemp_modularity)
            self.temp = self.temp / (1 + (self.cooling * self.temp))

            return True
        else:
            #temp* self.cooling = 0.995,0.999
            #self.temp = self.temp /(1+(self.cooling*self.temp))
            return False
Ejemplo n.º 17
0
def export_word_graph(keyword, dictionary, modelname, model, num_topics,
                      num_words, threshold, depth):
    """
    Constructs a network of relations between words and topics.
    This can be seen as a bipartite network, which is then transformed
    into a unipartite network of word-word relations.
    Of this network the giant component is taken and visualized.
    """

    H = nx.Graph()
    for word in dictionary.token2id.items():
        H.add_node(word[1], text=word[0], partition=1)

    n = 0
    for topic in model.show_topics(num_topics, num_words, formatted=False):
        H.add_node(len(dictionary) + n + 1, partition=0)
        for word in range(num_words):
            if topic[word][0] > threshold:  #only positive weights
                H.add_edge(
                    len(dictionary) + n + 1,
                    dictionary.token2id[topic[word][1]])
        n += 1

    # construct bipartite graph with topics as 0 and words as 1
    word_nodes, topic_nodes = nx.algorithms.bipartite.sets(H)

    # create unipartite projection for words
    W = nx.algorithms.bipartite.weighted_projected_graph(H, word_nodes)

    # write to disk as GML
    nx.write_gml(
        W, "{0}_{1}_{2}x{3}.gml".format(keyword + modelname, depth, num_topics,
                                        num_words))

    # read from disk as GML and create as igraph.Graph
    G = ig.read(
        "{0}_{1}_{2}x{3}.gml".format(keyword + modelname, depth, num_topics,
                                     num_words), "gml")

    # filter to giant component
    gc = ig.VertexClustering(G).giant()
    visual_style = {}
    visual_style["layout"] = G.layout_fruchterman_reingold()
    visual_style["vertex_size"] = 8
    visual_style["vertex_label"] = G.vs["text"]
    visual_style["edge_width"] = 0.5
    visual_style["bbox"] = (1200, 1200)
    visual_style["margin"] = 50
    ig.plot(
        gc, "{0}_{1}_{2}x{3}_FR.svg".format(keyword + modelname, depth,
                                            num_topics, num_words),
        **visual_style)
def average_odf_and_internal_density(
        graph: igraph.Graph, members: List[int]) -> Tuple[float, float]:
    """
    Calculates the average odf and the internal density of a solution in one go
    :param graph: graph to calculate the metric to
    :param members: community of each node
    :return: both measures
    """
    v_cluster = igraph.VertexClustering(graph, membership=members)
    edges_crossing = v_cluster.crossing()
    communities = v_cluster.subgraphs()

    n_communities = len(communities)
    average_odf = np.zeros(n_communities)
    internal_density = np.zeros(n_communities)

    comm_sizes = np.array([g.vcount() for g in communities])

    for e_index, crossing in enumerate(edges_crossing):
        if crossing:
            # edge between clusters, influence avg_odf
            edge = graph.es[e_index]

            source_degree = graph.degree(edge.source)
            average_odf[members[edge.source]] += 1.0 / source_degree

            target_degree = graph.degree(edge.target)
            average_odf[members[edge.target]] += 1.0 / target_degree
        else:
            # edge in clusters influence internal density
            edge = graph.es[e_index]
            internal_density[members[edge.source]] += 1

    # filter empty communities
    non_empty_comms = np.where(comm_sizes > 1)
    comm_sizes = np.take(comm_sizes, non_empty_comms)
    average_odf = np.take(average_odf, non_empty_comms)
    internal_density = np.take(internal_density, non_empty_comms)

    # aggregate values
    average_odf = average_odf / comm_sizes
    internal_density = (2.0 * internal_density) / (comm_sizes *
                                                   (comm_sizes - 1.0))
    internal_density = 1.0 - internal_density

    average_odf = np.mean(average_odf)
    internal_density = np.mean(internal_density)

    return average_odf, internal_density
Ejemplo n.º 19
0
def kmeans_clustering(h5_data, data_matrix, adjacency_matrix, k):
    centroids, membership_list, inertia = k_means(data_matrix, n_clusters=k)

    # Defaultdict with cluster-membership as key and a list with all members as value
    kmeans_communities = defaultdict(list)
    for x in range(len(membership_list)):
        #mass_float = round(float(data.columns[x]),3)
        #kmeans_communities[membership_list[x]].append(mass_float)
        kmeans_communities[membership_list[x]].append(x)

    # Creation of the actual graph
    kmeans_community_G = base_graph_structure(h5_data, adjacency_matrix)
    # Set communities based on the cluster found by k-means
    kmeans_communities = ig.VertexClustering(kmeans_community_G,
                                             membership=membership_list)

    return kmeans_community_G, kmeans_communities, membership_list, centroids
Ejemplo n.º 20
0
def outqueue2res(g, nprocs, mod_res, memship_res):
    """Take output from outqueue and calculate the final results for the
    community detection.

    :param g: igraph.Graph object
    :param nprocs: number of processes running in parallel
    :type nprocs: int
    :param mod_res: highest modularity score
    :param memship_res: membership list corresponding to highest modularity
                            score
    """
    for work in range(nprocs):
        for val in iter(output_queue.get, "STOP"):
            pass
            modaux = val[0]
            if modaux > mod_res.value:
                mod_res.value = modaux
                loug = ig.VertexClustering(g, val[1])
                memship_res[:] = loug.membership
Ejemplo n.º 21
0
def radicchi(G, measure='weak'):
    """ Wrapper for execution of the Radicchi community-detection algorithm. Returns 
    covers of the graph, with metadata representing provenance - in essence, a "dendrogram"
    that represents splits into communities. """
    g = G.copy()
    g.vs['id'] = list(range(g.vcount()))

    if measure == 'weak':
        result = radicchi_internal(G, g, 0, measure=measure, clustering=4)
    elif measure == 'strong':
        result = radicchi_internal(G, g, 0, measure=measure, clustering=3)
    else:
        raise Exception('Other measures of community not yet supported')

    clustering = [0] * G.vcount()
    for i, l in enumerate(result):
        for v in l:
            clustering[v] = i

    return ig.VertexClustering(G, clustering)
Ejemplo n.º 22
0
def pan_community_detection(uid):
    graph = convert_to_igraph(uid)
    # graph.vs['label'] = graph.vs['id']
    graph.vs['size'] = [30 for i in xrange(len(graph.vs))]

    # two kinds of methods for directed graph
    clusters = graph.community_spinglass()  # could get higher modularity
    # clusters = pan.community_edge_betweenness().as_clustering()

    membership = clusters.membership
    vc = ig.VertexClustering(graph, membership)

    result = []
    for c in vc:
        result.append([graph.vs[i]['id'] for i in c])

    scale = sum([len(c) for c in result]) / float(len(result))

    # draw communities
    # ig.plot(vc, bbox=(1000, 1000))

    return len(result), scale, clusters.modularity
Ejemplo n.º 23
0
def get_expected_edges_ml(part_obj, layer_vec, weight='weight'):
    """
	Multilayer calculation of expected edges.  Breaks up partition object \
	by layer and calculated expected edges for each layer-subgraph seperately\
	thus getting the relative weights correct
	:param part_obj: ig.VertexPartition with the appropriate graph and membership vector.
	:param layer_vec: array with length equaling number of nodes specifying which layer each node is in.
	:param weight: weight attribute on network
	:return:
	"""
    P_tot = 0
    layers = np.unique(layer_vec)
    for layer in layers:
        cind = np.where(layer_vec == layer)[0]
        subgraph = part_obj.graph.subgraph(cind)
        submem = np.array(part_obj.membership)[cind]

        cpartobj = ig.VertexClustering(graph=subgraph, membership=submem)
        P_tot += get_expected_edges(cpartobj,
                                    weight=weight,
                                    directed=subgraph.is_directed())
    return P_tot
    def GlobalMove(self, elligible):

        node_choice = [i for i in range(2, len(elligible)+1)] #list of number for nodes being passed in 
        node_num = np.random.choice(node_choice)
        node_list = list(set(np.random.choice(elligible, node_num, replace=False))) #Take several random nodes (2~ length of elligible)

        comm = self.comm 

        s_node = np.random.choice(node_list) #select single node to check value
        selected = comm[s_node]
        new_comms_list = [val for val in self.comms_set if val != selected]

        #change values in all processing data
        change_comm = np.random.choice(new_comms_list) # number for new comm
        for node in node_list :
          comm[node] = change_comm

        partition = ig.VertexClustering(self.ntwk, membership= list(comm.values()))
        gtemp_modularity = partition.q

        better = bool(gtemp_modularity >= self.modularity)
        temp_move = bool(np.random.rand() <= np.exp((gtemp_modularity - self.modularity) * (1/self.temp)))

        if better or temp_move :

          self.comm = comm
          self.modularity = gtemp_modularity
          self.t_modularity_list.append(gtemp_modularity)
          self.temp *= 0.995

          return True
        
        else:
          self.temp *=0.995

          return False
Ejemplo n.º 25
0
first_clustering_mem_list = []
for line in f:
	split_string = line.split(" ")
	first_clustering_mem_list.append(int(split_string[1].replace("\n", "")))
f.close()

# do the same for second clustering file
f = open(second_clustering_path, 'r')
second_clustering_mem_list = []
for line in f:
	split_string = line.split(" ")
	second_clustering_mem_list.append(int(split_string[1].replace("\n", "")))
f.close()

# create corresponding Vertex Clusterings
first_clustering = igraph.VertexClustering(input_network, first_clustering_mem_list)
second_clustering = igraph.VertexClustering(input_network, second_clustering_mem_list)

print "done creating clusterings."
if verbosity:
	print first_clustering
	print second_clustering

############ COMPARE CLUSTERINGS ############
vi = igraph.compare_communities(first_clustering, second_clustering, method='vi', remove_none=False)
nmi = igraph.compare_communities(first_clustering, second_clustering, method='nmi', remove_none=False)
split_join = igraph.compare_communities(first_clustering, second_clustering, method='split-join', remove_none=False)
rand = igraph.compare_communities(first_clustering, second_clustering, method='rand', remove_none=False)
adj_rand = igraph.compare_communities(first_clustering, second_clustering, method='adjusted_rand', remove_none=False)

print "\nSeparated by tabs:"
Ejemplo n.º 26
0
def community_sa(g, mod_calc, t0 = 2.5 *10**-4, C = 0.75, f = 0.5):
    """ Partitions the graph using the SA community detection algorithm
        proposed in Guimera and Amaral's publication.

        Assumptions made when implementing the algorithm include randomly
        selecting the node n for which to locally modify and using a 50% to
        determine whether a global split or merge is proposed. The
        splitalgorthim follows the detection algorith exactly.
        For each T, f * S**2 local changes are made.

        Parameters
        ----------
        g: igraph.Graph
            The graph of interest.
        mod_calc: lambda
            Function indicating which modularity measure to use.
        T0: float
            The intial temperature. The default is 2.5 * 10**-4, as proposed in
            Brockman's supplmental materials.
        c: float
            The cooling factor. The default is c = 0.75, as proposed in
            Brockman's supplemental materials.
        f: float
            The proportional of changes made. The default is 0, as proposed in
            Brockman's paper.

        Returns
        -------
        igraph.VertexClustering
            Returns a clustering of the vertex set of the graph.

        References
        ----------
        R. Guimera, L. Amaral

        Examples
        --------
        >>> mod_calc = lambda p, g: modularity(p, g)
        >>> parts = community_sa(g, mod_calc, f = 0.65)
        >>> type(parts)
        igraph.clustering.VertexClustering
    """
    t = float(t0)
    S = g.vcount()
    # Initialize p such that each N node is in its own module
    p = range(S)
    accept = False
    list_steps = []

    while(not accept):
        logger.info('temp: {} modularity: {}'.format(t, mod_calc(p, g)))

        # Propose fS**2 individual node movements
        for i in range(int((f * S)**2)):
            pnew = _local_update(list(p))
            # Accept new partition according to equation 2 from the publication
            if _accept_update(mod_calc, g, pnew, p, t):
                p = list(pnew)
            if i % 1000 == 0:
                logger.info('{} of {} local updates complete'.format(i, int((f * S)**2)))

        # Propose fS collective movements 
        # Change probability of merge given previous proposal rejections
        merge_prob = 2
        for i in range(int(f * S)):
            if i % 100 == 0:
                logger.info('{} of {} local updates complete'.format(i, int(f * S)))
            # With a changing probability, merge modules
            if randint(1, int(merge_prob)) != 1:
                pnew = _merge_update(list(p))
                # Accept new partition according to equation 2 from the publication
                if _accept_update(mod_calc, g, pnew, p, t):
                    p = list(pnew)
            # Otherwise split modules
            else:   
                # Split module using simplified SA community detection algorithm
                pnew = _split_update(igraph.VertexClustering(g, p), sample(list(p), 1)[0], t0, t, S, C, f, mod_calc)
                # Accept new partition according to equation 2 from the publication
                if _accept_update(mod_calc, g, pnew, p, t):
                    p = list(pnew) 
                else: 
                    # For every 1000th rejection reduce probability of split
                    merge_prob += 0.001

        # Append current modularity
        list_steps.append(mod_calc(p, g))
        # Check if modularity has improved within three last temp steps
        if len(list_steps) > 3:
            # Maintain length of 3
            list_steps.remove(list_steps[0])
            if (abs(list_steps[0] - list_steps[1]) + abs(list_steps[0] - list_steps[2])) < 2 * 10**-3: 
                # If M has seen no improvement, accept the partition and exit the while loop
                accept = True

        # Cool t
        t *= C
    return igraph.VertexClustering(g, p)
Ejemplo n.º 27
0
reposCG = reposCG.Read_Pickle('reposCG.pickle')

#%%
''' 
CALCULO CLUSTERS CON FAST GREEDY
reposcom creo que es una lista de listas donde cada lista es una comunidad 
'''
reposcom = reposCG.community_fastgreedy(weights=None)
#fgclust = reposcom.as_clustering()
fgclust2 = reposcom.as_clustering(n=20)
'''
Ir a savecsv.py y guardar!
Los clusters con ifastgreedy se hicieron a la componente gigante del grafo no dirigido
'''

#%%
'''
CREO OBJETO VERTEX CLUSTERING (que es lo que me devuelve la función community)
A partir de:
    - lista membership
    - grafo
'''

#udvc2 = ig.VertexClustering(udg, membership = membership)
rdvc = ig.VertexClustering(rdg, membership=membership)

#%%

#sizesu = sorted(list(udvc.sizes()))
sizesr = sorted(list(rdvc.sizes()))
Ejemplo n.º 28
0
def shuffled_comdet(g, numiter, parallel=True):
    """Run Louvain community detections with shuffled node sequence.

    Perform ``numiter`` Louvain community detections of the input graph ``g``
    and return the VertexClustering object with the highest modularity score.
    The community detections are performed on randomly shuffled node sequence
    and can be run in parallel. The number of processes is determined by the
    number of CPU of the work station (see below).

    :param g: graph for community detection
    :type g: igraph.Graph
    :param numiter: number of reshuffled community detections to run
    :type numiter: int
    :param parallel: if ``True``, the numiter community detections are performed
                in parallel, otherwise in sequence
    :type parallel: bool
    :returns: VertexClustering with highest modularity score
    :rtype: igraph.VertexClustering     
    """
    # get edgelist and nodelist from input graph
    edgelist = [g.es[i].tuple for i in range(len(g.es))]
    nodelist = range(g.vcount())

    # run first community detection:
    loug = g.community_multilevel(return_levels=False)
    mod = g.modularity(loug)
    # print('Modularity of the original order = ', mod)

    # create variables which are shared among all the parallel workers
    mod_res = mp.Value('d', mod)
    memship_res = mp.Array('i', loug.membership)

    # set number of processes run in parallel as nummber of CPUs (+-1 usually)
    if parallel:
        numprocs = mp.cpu_count() - 1
    else:
        numprocs = 1

    # processes which add elements to input and output queue
    p_inqueue = mp.Process(target=add2inqueue, args=(numiter - 1, numprocs))
    p_outqueue = mp.Process(target=outqueue2res,
                            args=(g, numprocs, mod_res, memship_res))

    # create worker processes
    ps = [mp.Process(target=comdet_worker, args=(nodelist, edgelist))
          for i in range(numprocs)]

    # start queues
    p_inqueue.start()
    p_outqueue.start()

    # start workers
    for p in ps:
        p.start()

    # end processes once they are done
    p_inqueue.join()
    for p in ps:
        p.join()
    p_outqueue.join()

    print 'Done.'
    return ig.VertexClustering(g, memship_res[:])
Ejemplo n.º 29
0
def find_partition(graph,
                   method,
                   initial_membership=None,
                   weight=None,
                   resolution_parameter=1.0,
                   consider_comms=ALL_NEIGH_COMMS):
    """
  Method for detecting communities using the Louvain algorithm. This functions
  finds the optimal partition given the specified method. For the various possible
  methods see package documentation.

  Keyword arguments:

  graph
    The graph for which to find the optimal partition.

  method
    The type of partition which will be used during optimisation.

  initial_membership=None
    If provided, the optimisation will start with this initial membership.
    Should be a list that contains any unique identified for a community, which
    is converted to a numeric representation. Since communities can never be
    split, the number of communities in this initial partition provides an upper
    bound.

  weight=None
    If provided, indicates the edge attribute to use as a weight. (N.B. note
    that Significance is not suited for weighted graphs).

  resolution_parameter=1.0
    For those methods that use a resolution parameter, this is indicated here.

  consider_comms=ALL_NEIGH_COMMS
    This parameter determines which communities to consider when moving a node.

    ALL_COMMS
      Consider all communities always.

    ALL_NEIGH_COMMS
      Consider all communities of the neighbours

    RAND_COMM
      Consider only a single random community

    ALL_NEIGH_COMMS
      Consider only a single random community of the neighbours. Notice that
      this is sampled from the set of all neighbours so that the communities are
      sampled with respective frequency.

    In ordinary cases it is usually not necessary to alter this parameter. The
    default choice of considering all communities of the neighbours works
    relatively well, and is relatively fast. However, in the case of negative
    weights, it may be better to move a node to a community to which it is not
    connected, so that one would need to consider all communities.
    Alternatively, by only selecting a single random community from the
    neighbours to consider, one can considerably speed up the algorithm, without
    loosing too much quality.

  The quality of the partition, as measured by the indicated method is
  provided in the returned partition as partition.quality.

  returns: optimized partition."""
    pygraph_t = __get_py_capsule(graph)
    if weight is not None:
        if isinstance(weight, str):
            weight = graph.es[weight]
        else:
            # Make sure it is a list
            weight = list(weight)
    if initial_membership is not None:
        gen = _ig.UniqueIdGenerator()
        initial_membership = [gen[m] for m in initial_membership]
    membership, quality = _c_louvain._find_partition(pygraph_t, method,
                                                     initial_membership,
                                                     weight,
                                                     resolution_parameter,
                                                     consider_comms)
    partition = _ig.VertexClustering(graph, membership)
    partition.quality = quality
    return partition
Ejemplo n.º 30
0
    def compute_transitions(self):
        try:
            import igraph
        except ImportError:
            raise ImportError(
                "To run paga, you need to install `pip install python-igraph`"
            )
        vkey = f"{self.vkey}_graph"
        if vkey not in self._adata.uns:
            raise ValueError(
                "The passed AnnData needs to have an `uns` annotation "
                "with key 'velocity_graph' - a sparse matrix from RNA velocity."
            )
        if self._adata.uns[vkey].shape != (self._adata.n_obs, self._adata.n_obs):
            raise ValueError(
                f"The passed 'velocity_graph' has shape {self._adata.uns[vkey].shape} "
                f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}"
            )

        clusters = self._adata.obs[self.groups]
        cats = clusters.cat.categories
        vgraph = self._adata.uns[vkey] > 0.1
        time_prior = self.use_time_prior

        if isinstance(time_prior, str) and time_prior in self._adata.obs.keys():
            vpt = self._adata.obs[time_prior].values
            vpt_mean = self._adata.obs.groupby(self.groups)[time_prior].mean()
            vpt_means = np.array([vpt_mean[cat] for cat in clusters])
            rows, cols, vals = [], [], []
            for i in range(vgraph.shape[0]):
                indices = vgraph[i].indices
                idx_bool = vpt[i] < vpt[indices]
                idx_bool &= vpt_means[indices] > vpt_means[i] - 0.1
                cols.extend(indices[idx_bool])
                vals.extend(vgraph[i].data[idx_bool])
                rows.extend([i] * np.sum(idx_bool))
            vgraph = vals_to_csr(vals, rows, cols, shape=vgraph.shape)

        if isinstance(self.end_key, str) and self.end_key in self._adata.obs.keys():
            set_row_csr(vgraph, rows=np.where(self._adata.obs[self.end_key] > 0.7)[0])

        if isinstance(self.root_key, str) and self.root_key in self._adata.obs.keys():
            vgraph[:, np.where(self._adata.obs[self.root_key] > 0.7)[0]] = 0
            vgraph.eliminate_zeros()

        membership = self._adata.obs[self.groups].cat.codes.values
        g = get_igraph_from_adjacency(vgraph, directed=True)
        vc = igraph.VertexClustering(g, membership=membership)
        cg_full = vc.cluster_graph(combine_edges="sum")
        transitions = get_sparse_from_igraph(cg_full, weight_attr="weight")
        transitions = transitions - transitions.T
        transitions_conf = transitions.copy()
        transitions = transitions.tocoo()
        total_n = self._neighbors.n_neighbors * np.array(vc.sizes())
        for i, j, v in zip(transitions.row, transitions.col, transitions.data):
            reference = np.sqrt(total_n[i] * total_n[j])
            transitions_conf[i, j] = 0 if v < 0 else v / reference
        transitions_conf.eliminate_zeros()

        # remove non-confident direct paths if more confident indirect path is found.
        T = transitions_conf.A
        threshold = max(np.nanmin(np.nanmax(T / (T > 0), axis=0)) - 1e-6, 0.01)
        T *= T > threshold
        for i in range(len(T)):
            idx = T[i] > 0
            if np.any(idx):
                indirect = np.clip(T[idx], None, T[i][idx][:, None]).max(0)
                T[i, T[i] < indirect] = 0

        if self.minimum_spanning_tree:
            T_tmp = T.copy()
            T_num = T > 0
            T_sum = np.sum(T_num, 0)
            T_max = np.max(T_tmp)
            for i in range(len(T_tmp)):
                if T_sum[i] == 1:
                    T_tmp[np.where(T_num[:, i])[0][0], i] = T_max
            from scipy.sparse.csgraph import minimum_spanning_tree

            T_tmp = np.abs(minimum_spanning_tree(-T_tmp).A) > 0
            T = T_tmp * T

        transitions_conf = csr_matrix(T)
        self.transitions_confidence = transitions_conf.T

        # set threshold for minimal spanning tree.
        df = pd.DataFrame(T, index=cats, columns=cats)
        self.threshold = np.nanmin(np.nanmax(df.values / (df.values > 0), axis=0))
        self.threshold = max(self.threshold - 1e-6, 0.01)