コード例 #1
0
class MinGraphBuilder:
    def __init__(self):
        self.graph = Graph(directed=False)
        self.codes = []
        self.labels = []
        self.sources = []

    def add_nodes(self, df, ns):
        n = len(df)
        _log.info('adding %d nodes to graph', n)
        start = self.graph.num_vertices()
        vs = self.graph.add_vertex(n)
        end = self.graph.num_vertices()
        assert end - start == n
        nodes = pd.Series(np.arange(start, end, dtype='i4'), index=df['id'])
        self.codes.append(df['id'].values + ns.offset)
        self.labels.append(df['id'].values)
        self.sources.append(np.full(n, ns.code, dtype='i2'))
        return nodes

    def add_edges(self, f, src, dst):
        _log.info('adding %d edges to graph', len(f))
        edges = np.zeros((len(f), 2), dtype='i4')
        edges[:, 0] = src.loc[f.iloc[:, 0]]
        edges[:, 1] = dst.loc[f.iloc[:, 1]]
        self.graph.add_edge_list(edges)

    def finish(self):
        _log.info('setting code attributes')
        code_a = self.graph.new_vp('int64_t')
        code_a.a[:] = np.concatenate(self.codes)
        self.graph.vp['code'] = code_a

        _log.info('setting label attributes')
        label_a = self.graph.new_vp('int64_t')
        label_a.a[:] = np.concatenate(self.labels)
        self.graph.vp['label'] = label_a

        _log.info('setting source attributes')
        source_a = self.graph.new_vp('int16_t')
        source_a.a[:] = np.concatenate(self.sources)
        self.graph.vp['source'] = source_a

        return self.graph
コード例 #2
0
def copy_node_attributes(g_to: gt.Graph, node_to: gt.Vertex, g_from: gt.Graph,
                         node_from: gt.Vertex):
    for p_type, vp_name in g_from.vp.properties:
        if p_type != 'v':
            continue
        old_vp = g_from.vp[vp_name]
        if vp_name not in g_to.vp:
            g_to.vp[vp_name] = g_to.new_vp(old_vp.value_type())
        new_vp = g_to.vp[vp_name]
        new_vp[node_to] = deepcopy(old_vp[node_from])
コード例 #3
0
    def __make_graph(self, X):
        # make a graph
        g = Graph(directed=False)
        # define node properties
        # kind: docs - 0, words - 1
        kind = g.vp["kind"] = g.new_vp("int")
        if self.weighted_edges:
            ecount = g.ep["count"] = g.new_ep("int")

        # add all documents first
        doc_vertices = [g.add_vertex() for _ in range(X.shape[0])]
        word_vertices = [g.add_vertex() for _ in range(X.shape[1])]

        # add all documents and words as nodes
        # add all tokens as links
        X = scipy.sparse.coo_matrix(X)

        if not self.weighted_edges and X.dtype != int:
            X_int = X.astype(int)
            if not np.allclose(X.data, X_int.data):
                raise ValueError('Data must be integer if '
                                 'weighted_edges=False')
            X = X_int

        for row, col, count in zip(X.row, X.col, X.data):
            doc_vert = doc_vertices[row]
            kind[doc_vert] = 0
            word_vert = word_vertices[col]
            kind[word_vert] = 1

            if self.weighted_edges:
                e = g.add_edge(doc_vert, word_vert)
                ecount[e] = count
            else:
                for n in range(count):
                    g.add_edge(doc_vert, word_vert)
        return g
コード例 #4
0
def vp_map(g: gt.Graph,
           v_property: str,
           p_type: str = 'int') -> gt.PropertyMap:
    if v_property not in g.vp:
        g.vp[v_property] = g.new_vp(p_type)
    return g.vp[v_property]
コード例 #5
0
class GraphAdapter(AdapterBase):
    def __init__(
            self,
            seed_str,
            name,
            file_extension='gml',
            vertex_schema={
                'gene': 'vector<bool>',
                'gen': 'int',
                'fitness': 'vector<long>',
                'score': 'long'
            },
            edge_schema={
                'label': 'string',
                'gen': 'int'
            }):

        self.seed = seed_str
        self.name = name
        self.file_extension = file_extension
        self.graph = Graph()

        # Create graph properties
        self.graph.gp.labels = self.graph.new_gp('vector<string>')
        self.graph.gp.labels = [seed_str]

        self.graph.gp.name = self.graph.new_gp('string')
        self.graph.gp.name = self.name

        # Create vertex properties
        for key in vertex_schema:
            self.graph.vp[key] = self.graph.new_vp(vertex_schema[key])

        # Create edge properties
        for key in edge_schema:
            self.graph.ep[key] = self.graph.new_ep(edge_schema[key])

    def add_node(self, gene, gen=0, attrs={}):
        v = self.graph.add_vertex()
        self.graph.vp.gene[v] = gene
        self.graph.vp.gen[v] = gen
        self.set_props(v, attrs)

        return self.graph.vertex_index[v]

    def add_edge(self, TAG, srcID, destID, attrs={}):
        e = self.graph.add_edge(srcID, destID)
        self.graph.ep.label[e] = TAG
        for key in attrs:
            self.graph.ep[key][e] = attrs[key]
        return self.graph.edge_index[e]

    def getNode(self, nodeID):
        return self.graph.vertex(nodeID)

    def getEdge(self, edgeID):
        return self.graph.edge(edgeID)

    def fetchIndividual(self, individual):
        targets = graph_tool.util.find_vertex(self.graph, self.graph.vp.gene,
                                              individual)
        # find the last node, the one with highest `gen`
        if targets:
            # guaranteed to be in order!!
            return self.graph.vertex_index[targets[-1]]
        else:
            return None

    def walk_edge(self, TAG, startID):
        pass

    def update_fitness(self, nodeID, fitness):
        v = self.graph.vertex(nodeID)
        self.set_props(v, {'fitness': fitness})

    def update_score(self, nodeID, score):
        v = self.graph.vertex(nodeID)
        self.set_props(v, {'score': score})

    def set_props(self, v, attrs):
        for key in attrs:
            self.graph.vp[key][v] = attrs[key]

    def save(self):
        filename = os.path.join('graphs',
                                self.name) + '.' + self.file_extension
        self.graph.save(filename)
        return filename

    def numNodes(self):
        return self.graph.num_vertices()
コード例 #6
0
class FullGraphBuilder:
    def __init__(self):
        self.graph = Graph(directed=False)
        self.codes = []
        self.sources = []
        self.labels = []
        self.attrs = set()

    def add_nodes(self, df, ns):
        n = len(df)
        _log.info('adding %d nodes to graph', n)
        start = self.graph.num_vertices()
        vs = self.graph.add_vertex(n)
        end = self.graph.num_vertices()
        assert end - start == n
        nodes = pd.Series(np.arange(start, end, dtype='i4'), index=df['id'])
        self.codes.append(df['id'].values + ns.offset)
        self.sources.append(np.full(n, ns.code, dtype='i2'))
        if 'label' in df.columns:
            self.labels += list(df['label'].values)
        else:
            self.labels += list(df['id'].astype('str').values)

        for c in df.columns:
            if c in ['id', 'label']:
                continue
            if c not in self.attrs:
                vp = self.graph.new_vp('string')
                self.graph.vp[c] = vp
                self.attrs.add(c)
            else:
                vp = self.graph.vp[c]

            for v, val in zip(vs, df[c].values):
                vp[v] = val

        return nodes

    def add_edges(self, f, src, dst):
        _log.info('adding %d edges to graph', len(f))
        edges = np.zeros((len(f), 2), dtype='i4')
        edges[:, 0] = src.loc[f.iloc[:, 0]]
        edges[:, 1] = dst.loc[f.iloc[:, 1]]
        self.graph.add_edge_list(edges)

    def finish(self):
        _log.info('setting code attributes')
        code_a = self.graph.new_vp('int64_t')
        code_a.a[:] = np.concatenate(self.codes)
        self.graph.vp['code'] = code_a

        _log.info('setting source attributes')
        source_a = self.graph.new_vp('string')
        for v, s in zip(self.graph.vertices(), np.concatenate(self.sources)):
            source_a[v] = src_label_rev[s]
        self.graph.vp['source'] = source_a

        _log.info('setting source attributes')
        label_a = self.graph.new_vp('string')
        for v, l in zip(self.graph.vertices(), self.labels):
            label_a[v] = l
        self.graph.vp['label'] = label_a

        return self.graph
コード例 #7
0
class Interactome:
    r'''
    Attributes:
        interactome_path (str):
            the path to the tsv file containing the interactome per se
        namecode (str):
            the name used to recover the (sub)interactome later
        G (:class:`graph_tool.Graph`):
            the internal representation of the interactome as a graph
        genes2vertices (dict):
            mapping Entrez gene :math:`\rightarrow` set of vertices in ``self.G``
        genes (set):
            set of Entrez names of genes present in ``self.G``
        lcc_cache (dict):
            mapping a number of genes to the LCC size of the uniformly sampled subgraphs of this size
        density_cache (dict):
            mapping a number of genes to the density of the uniformly sampled subgraphs of this size
        clustering_cache (dict):
            mapping a number of genes to the clustering coefficient of the uniformly sampled subgraphs of this size
        distances (2D :class:`np.ndarray`):
            matrix of shortest paths from gene :math:`i` to gene :math:`j`
    '''
    def __init__(self, path, namecode=None):
        self.interactome_path = path
        self.namecode = namecode
        self.distances = None
        log('Loading interactome')
        if path is not None:
            self.load_network(path)
        log('interactome loaded')
        self.lcc_cache = self.density_cache = self.clustering_cache = None

    def get_gene_degree(self, gene):
        '''
        Get the degree of a given gene within the interactome.

        Args:
            gene (int): Entrez ID of the gene

        Return:
            int:
                `None` if the gene is not in :math:`\mathscr I` else the number of associated genes within the interactome
        '''
        if gene not in self.genes:
            return None
        vert_id = self.vert_id(gene)
        return self.G.vertex(vert_id).out_degree()

    def set_namecode(self, namecode):
        assert isinstance(namecode, str)
        self.namecode = namecode

    def get_lcc_cache(self):
        '''
        Return the cache of LCC sizes. WARNING: no copy is made.
        Modifying the returned cache can result in undefined behaviour.
        '''
        self.load_lcc_cache()
        return self.lcc_cache

    def load_lcc_cache(self):
        '''Load the cache of LCC sizes simulations if exists, else creates an empty one.'''
        if self.lcc_cache is None:
            self.lcc_cache = IO.load_lcc_cache(self)

    def get_density_cache(self):
        '''
        Return the cache of density. WARNING: no copy is made.
        Modifying the returned cache can result in undefined behaviour.
        '''
        self.load_density_cache()
        return self.density_cache

    def load_density_cache(self):
        '''Load the cache of density simulations if exists, else creates an empty one.'''
        if self.density_cache is None:
            self.density_cache = IO.load_density_cache(self)

    def get_clustering_cache(self):
        '''
        Return the cache of clustering coefficients. WARNING: no copy is made.
        Modifying the returned cache can result in undefined behaviour.
        '''
        self.load_clustering_cache()
        return self.clustering_cache

    def load_clustering_cache(self):
        '''Load the cache of clustering coefficient simulations if exists, else creates an empty one.'''
        if self.clustering_cache is None:
            self.clustering_cache = IO.load_clustering_cache(self)

    def load_network(self, path):
        '''
        Load the interactome stored in a tsv file

        Args:
            path: the path of the interactome file
        '''
        self.G = Graph(directed=False)
        self.genes2vertices = dict()
        with open(path) as f:
            reader = csv.reader(f, delimiter='\t')
            for genes in reader:
                gene1, gene2 = map(int, genes)
                self.add_vertex(gene1)
                self.add_vertex(gene2)
                self.G.add_edge(self.vert_id(gene1), self.vert_id(gene2))
        self.genes = set(self.genes2vertices.keys())
        self.vertices2genes = {v: g for g, v in self.genes2vertices.items()}
        self.compute_spls()

    def add_vertex(self, gene):
        '''
        Create new vertex for `gene` in the graph if not yet present

        Args:
            gene: the name of the gene to ad in the interactome
        '''
        if gene not in self.genes2vertices:
            self.genes2vertices[gene] = len(self.genes2vertices)
            self.G.add_vertex()

    def vert_id(self, gene):
        '''
        Return the id of the desired gene

        Args:
            gene: the gene to retrieve

        Returns:
            the id of the desired gene

        Raises:
            KeyError: if no such gene is in the interactome
        '''
        return self.genes2vertices[gene]

    def verts_id(self, genes, gene_to_ignore=None):
        '''
        Return a list of Vertex instances of the desired genes

        Args:
            genes: an iterable of desired genes
            gene_to_ignore: gene in `genes` that is not desired

        Returns:
            a list of Vertex instances of the desired genes

        Raises:
            KeyError: if any of the genes is not in the interactome
        '''
        return np.array(
            [self.vert_id(gene) for gene in genes if gene != gene_to_ignore])

    def compute_spls(self):
        '''Compute the shortest path between each pair of genes.'''
        if self.distances is not None:
            return
        dists = shortest_distance(self.G)
        self.distances = np.empty(
            (self.G.num_vertices(), self.G.num_vertices()), dtype=np.int)
        for idx, array in enumerate(dists):
            self.distances[idx, :] = array.a[:]

    def get_all_dists(self, A, B):
        '''
        Get a list containing all the distances from a gene in A to the gene set B

        Args:
            A: a source gene set
            B: a destination gene set

        Returns:
            a list of distances [d(a, B) s.t. a in A]
        '''
        insert_self = A is B
        all_dists = list()
        for gene1 in A:
            if insert_self:
                for idx, el in enumerate(B):
                    if el == gene1:
                        indices = np.delete(B, idx)
                        break
            else:
                indices = B
            if not indices.any():
                continue
            indices = np.asarray(indices)
            self.compute_spls()
            dists = self.distances[gene1, indices]
            min_dist = np.min(dists)
            if min_dist > self.G.num_vertices():  # if gene is isolated
                continue  # go to next gene
            all_dists.append(min_dist)
        return all_dists

    def get_d_A(self, A):
        '''
        Return the inner distance of the disease module A as defined in [1].

        Args:
            A: a gene set

        Returns:
            :math:`d_A`

        References
        ----------

        [1] J. Menche et al., Science 347 , 1257601 (2015). DOI: 10.1126/science.1257601 http://science.sciencemag.org/content/347/6224/1257601
        '''
        return np.mean(self.get_all_dists(A, A))

    def get_d_AB(self, A, B):
        '''
        Return the graph-based distance between A and B as defined in [1].

        Args:
            A: a gene set
            B: a gene set

        Returns:
            :math:`d_{AB}`

        References
        ----------

        [1] J. Menche et al., Science 347 , 1257601 (2015). DOI: 10.1126/science.1257601 http://science.sciencemag.org/content/347/6224/1257601
        '''
        values = self.get_all_dists(A, B)
        values.extend(self.get_all_dists(B, A))
        return np.mean(values, dtype=np.float32)

    def get_random_subgraph(self, size):
        '''
        Uniformly sample a subgraph of given size.

        Args:
            size: number of genes to sample

        Returns:
            A subgraph of self of given size
        '''
        seeds = np.random.choice(len(self.genes), size=size, replace=False)
        return self.get_subgraph(seeds)

    def get_subgraph(self, vertices, genes=False):
        r'''
        Return the subgraph of self induced by the given vertices.

        Args:
            vertices: a set of vertex IDs (or a set of genes)
            genes: a boolean with value `True` if `vertices` is a set of genes
                and `False` if it is a set of vertex IDs.

        Returns:
            :math:`\Delta_{\text{vertices}}(G)`
        '''
        if genes:
            vertices = self.verts_id(vertices)
        filt = self.G.new_vertex_property('bool')
        filt.a[vertices] = True
        return GraphView(self.G, vfilt=filt)

    def get_genes_lcc_size(self, genes):
        r'''
        Return the LCC size of the graph induced by given genes.

        Args:
            genes: an iterable containing genes

        Returns:
            :math:`|LCC(\Delta_{\text{genes}}(G))|`
        '''
        return _get_lcc_size(self.get_subgraph(np.asarray(genes)))

    def get_random_genes_lcc(self, size):
        r'''
        Return the LCC size of a random subgraph of given size.

        Args:
            size (in): number of genes to sample

        Returns:
            :math:`|LCC(\mathcal G(\text{size}, G))|`
        '''
        return _get_lcc_size(self.get_random_subgraph(size))

    def get_random_genes_density(self, size):
        r'''
        Return the density of a random subgraph of given size.

        Args:
            size (int): number of genes to sample

        Returns:
            :math:`d(\mathcal G(\text{size}, G))`
        '''
        return _get_density(self.get_random_subgraph(size))

    def get_genes_density(self, genes):
        r'''
        Return the density of the subgraph induced by given genes.

        Args:
            genes: an iterable of genes

        Returns:
            :math:`d(\Delta_{\text{genes}}(G))`
        '''
        return _get_density(self.get_subgraph(np.asarray(genes)))

    def get_random_genes_clustering(self, size):
        r'''
        Return the clustering coefficient of a random subgraph of given size.

        Args:
            size (int): number of genes to sample

        Returns:
            :math:`C(\mathcal G(\text{size}, G))`
        '''
        G = self.get_random_subgraph(size)
        ret = _get_clustering_coefficient(G)
        return ret

    def get_genes_clustering(self, genes, entrez=False):
        r'''
        Return the clustering coefficient of the subgraph induced by given genes.

        Args:
            genes: an iterable of genes

        Returns:
            :math:`C(\Delta_{\text{genes}}(G))`
        '''
        if entrez:
            genes = self.verts_id(genes)
        return _get_clustering_coefficient(self.get_subgraph(
            np.asarray(genes)))

    def get_lcc_score(self,
                      genes,
                      nb_sims,
                      shapiro=False,
                      shapiro_threshold=.05):
        r'''
        Get the z-score and the empirical p-value of the LCC size of given genes.

        Args:
            genes (set): gene set
            nb_sims (int): minimum number of simulations for probability distribution estimation
            shapiro (bool): True if normality test is needed, False otherwise (default False)
            shapiro_threshold (float): statistical threshold for normality test

        Returns:
            tuple:
                :math:`(z, p_e, N)` if shapiro is True and :math:`(z, p_e)` otherwise;
                where z is the z-score of the LCC size, :math:`p_e` is the associated
                empirical p-value and N is True if Shapiro-Wilk normality test
                p-value >= shapiro_threshold and False otherwise

        Raises:
            ValueError: if not enough simulations have been performed
        '''
        genes = genes & self.genes
        genes = self.verts_id(genes)
        nb_seeds = len(genes)
        if nb_seeds == 0:
            print('\n\t[Warning: get_lcc_score found no matching gene]')
            return None
        genes_lcc = self.get_genes_lcc_size(genes)
        try:
            lccs = self.get_lcc_cache()[nb_seeds]
            assert len(lccs) >= nb_sims
        except AssertionError:
            raise ValueError(('Only {} simulations found. Expected >= {}. ' + \
                              'fill_lcc_cache has not been called properly') \
                             .format(len(lccs), nb_sims))
        std = lccs.std()
        mean = lccs.mean()
        z = None if std == 0 else float((genes_lcc - mean) / std)
        empirical_p = (lccs >= genes_lcc).sum() / len(lccs)
        if shapiro:
            is_normal = stats.shapiro(np.random.choice(
                lccs, size=5000))[1] >= shapiro_threshold
            return z, empirical_p, is_normal
        return z, empirical_p

    def where_density_cache_nb_sims_lower_than(self, sizes, nb_sims):
        r'''
        Get the sizes whose density hasn't been simulated enough.

        Args:
            sizes (iterable): iterable of int values corresponding to sizes to test
            nb_sims (int): minimal number of simulations

        Returns:
            set:
                set of int values corresponding to sizes that haven't been simulated enough:

                .. math::
                    \{s \in \text{sizes} : |\text{density_cache}[s]| < \text{nb_sims}\}
        '''
        self.load_density_cache()
        return {size for size in sizes \
                     if size not in self.density_cache.keys() \
                     or len(self.density_cache[size]) < nb_sims}

    def where_lcc_cache_nb_sims_lower_than(self, sizes, nb_sims):
        r'''
        Get the sizes whose LCC hasn't been simulated enough.

        Args:
            sizes (iterable): iterable of int values corresponding to sizes to test
            nb_sims (int): minimal number of simulations

        Returns:
            set:
                set of int values corresponding to sizes that haven't been simulated enough:

                .. math::
                    \{s \in \text{sizes} : |\text{lcc_cache}[s]| < \text{nb_sims}\}
        '''
        self.load_lcc_cache()
        return {size for size in sizes \
                     if size not in self.lcc_cache.keys() \
                     or len(self.lcc_cache[size]) < nb_sims}

    def where_clustering_cache_nb_sims_lower_than(self, sizes, nb_sims):
        r'''
        Get the sizes whose clustering coefficient hasn't been simulated enough.

        Args:
            sizes (iterable): iterable of int values corresponding to sizes to test
            nb_sims (int): minimal number of simulations

        Returns:
            set:
                set of int values corresponding to sizes that haven't been simulated enough:

                .. math::
                    \{s \in \text{sizes} : |\text{clustering_cache}[s]| < \text{nb_sims}\}
        '''
        self.load_clustering_cache()
        return {size for size in sizes \
                     if size not in self.clustering_cache.keys() \
                     or len(self.clustering_cache[size]) < nb_sims}

    def fill_lcc_cache(self, nb_sims, sizes):
        r'''
        Fill the lcc_cache such that:

        .. math::
            \forall s \in \text{sizes} : |\text{lcc_cache[n]}| >= \text{nb_sims}

        Args:
            nb_sims (int): minimal number of simulations to be performed
            sizes (set): set of number of genes for which LCC size shall be tested
        '''
        self.load_lcc_cache()
        a = time()
        for idx, size in enumerate(sizes):
            self._compute_lcc_distribution(nb_sims, size)
            prop = (idx + 1) / len(sizes)
            log('{} out of {}  ({:3.2f}%)    eta: {}' \
                .format(idx+1, len(sizes), 100*prop,
                        sec2date((time()-a)/prop*(1-prop))),
                end='\r')
        print('')
        self._write_lcc_cache()

    def fill_density_cache(self, nb_sims, sizes):
        r'''
        Fill the density cache such that:

        .. math::
            \forall s \in \text{sizes} : |\text{density_cache[n]}| \geq \text{nb_sims}

        Args:
            nb_sims (int): minimal number of simulations to be performed
            sizes (set): set of number of genes for which density shall be tested
        '''
        self.load_density_cache()
        a = time()
        for idx, size in enumerate(sizes):
            self._compute_disease_module_density(nb_sims, size)
            prop = (idx + 1) / len(sizes)
            log('{} out of {}  ({:3.2f}%)    eta: {}' \
                .format(idx+1, len(sizes), 100*prop,
                        sec2date((time()-a)/prop*(1-prop))),
                end='\r')
        print('')
        self._write_density_cache()

    def fill_clustering_cache(self, nb_sims, sizes):
        r'''
        Fill the clustering cache such that:

        .. math::
            \forall s \in \text{ßizes} : |\text{clustering_cache[n]}| \geq \text{nb_sims}

        Args:
            nb_sims (int): minimal nuber of simulations to be performed
            sizes (set): set of number of genes for which clustering coefficient shall be tested
        '''
        self.load_clustering_cache()
        a = time()
        for idx, size in enumerate(sizes):
            self._compute_disease_modules_clustering(nb_sims, size)
            prop = (idx + 1) / len(sizes)
            log('{} out of {}  ({:3.2f}%)    eta: {}' \
                .format(idx+1, len(sizes), 100*prop,
                        sec2date((time()-a)/prop*(1-prop))),
                end='\r')
        print('')
        self._write_clustering_cache()

    def get_subinteractome(self,
                           genes,
                           neighbourhood='none',
                           namecode=None,
                           neighb_count=1):
        r'''
        Extract a subinteractome and return it as an :class:`Interactome`
        object which is then usable for analyses.

        For :math:`H` a subgraph of :math:`G`, the first neighbourhood of :math:`H`
        within :math:`G` is defined by the graph:

        .. math::
            \mathcal N_G(H) = \Delta_{\mathcal N_G(V(H))}(G),

        where for every :math:`W \subset V(G)`:

        .. math::
            \mathcal N_G(W) = W \cup \left\{v \in V(G) : \exists w \in V(H) \text{ s.t. } \{v, w\} \in E(G)\right\} \subset V(G).

        Args:
            genes (set): the gene set inducing the subinteractome
            neighbourhood (str):
                one of the following: `'none'`, `'first'`, `'first-joined'` where:

                * `'none'` for no neighbouring gene
                * `'first'` for the first neighbourhood :math:`\mathcal N_G(H)` with :math:`G` being `self` and :math:`H` being `genes`
                * `'first-joined'` for the first neighbourhood with restriction that every neighbourhood gene must be associated to at least `neighb_count` genes.
            namecode (str): the namecode to be given to the subinteractome
            neighb_count (int): (only if `neighbourhood == 'first-joined'`) determines the minimum number of adjacent genes to be extracted:

                .. math::
                    \mathcal N_G^{(k)}(H) := \Delta_{\mathcal N_G^{(k)}}(H),

                with:

                .. math::
                    \mathcal N_G^{(k)}(W) := W \cup \left\{v \in V(G) : \exists \{v_1, \ldots, v_k\} \in \binom {V(H)}k \text{ s.t. } \{v, v_i\} \in E(G)
                        \quad (i=1, \ldots, k)\right\} \subset V(G).

        Return:
            :class:`Interactome`:
                the subinteractome
        '''
        #TODO: implement neighbourhood extraction
        genes &= self.genes
        genes_hash = md5(''.join(sorted(map(
            str, genes))).encode('utf-8')).hexdigest()
        path = self.interactome_path + genes_hash
        ret = IO.load_interactome(path, False, namecode)
        if ret is not None:
            return ret
        ret = deepcopy(self)
        ret.namecode = namecode
        ret.interactome_path = path

        ret.genes, ret.G = self._get_subinteractome_graph(
            genes, neighbourhood, neighb_count)
        print('So {} vertices, {} edges (density == {})' \
              .format(
                ret.G.num_vertices(),
                ret.G.num_edges(),
                2*ret.G.num_edges()/(ret.G.num_vertices()*(ret.G.num_vertices() - 1))
              )
        )
        genes_l = np.array(list(ret.genes))
        # Compute the mappings gene -> idx
        vp = ret.G.vp['genes']
        ret.genes2vertices = {
            vp[vertex]: int(vertex)
            for vertex in ret.G.vertices()
        }
        print('...  {}'.format(len(ret.genes2vertices)))
        del ret.G.vertex_properties['genes']
        del self.G.vertex_properties['genes']
        ret.genes = set(ret.genes2vertices.keys())
        ret.lcc_cache = ret.density_cache = None
        ret.distances = None
        ret.compute_spls()
        IO.save_interactome(ret)
        return ret

    def _get_subinteractome_graph(self, genes, neighbourhood, neighb_count):
        print('Initially: {} genes'.format(len(genes)))
        if neighbourhood is not None and neighbourhood != 'none':
            genes = self._get_genes_neighbourhood(genes, neighbourhood,
                                                  neighb_count)
        vp = self.G.new_vp('int')
        for gene, vertex in self.genes2vertices.items():
            vp[self.G.vertex(vertex)] = gene
        self.G.vertex_properties['genes'] = vp
        genes_l = np.array(list(genes))
        # Extract subgraph with ``genes``
        G = self.get_subgraph(genes, True)
        # Ignore genes of degree 0
        genes_idx = np.where(
            G.get_out_degrees(np.arange(G.num_vertices())) > 0)[0]
        genes = {self.vertices2genes[idx] for idx in genes_idx}
        print('After removing isolated vertices: {} genes'.format(len(genes)))
        return genes, Graph(self.get_subgraph(genes, True), prune=True)

    def _get_genes_neighbourhood(self, genes, neighbourhood, neighb_count):
        raise NotImplementedError()
        # First neighbourhood
        vert2genes = dict()
        for k, v in self.genes2vertices.items():
            vert2genes[v] = k
        closure_genes = set()
        for gene in genes:
            gene_idx = self.genes2vertices[gene]
            for neighbour in self.G.get_out_neighbours(gene_idx):
                closure_genes.add(vert2genes[neighbour])
        return closure_genes | genes

    def copy(self):
        '''
        Return a copy of the interactome
        '''
        ret = deepcopy(self)
        ret.G = self.G.copy()  # watch out: deepcopy(self.G) returns None...
        return ret

    ##### Private methods

    def _compute_lcc_distribution(self, nb_sims, size):
        N = nb_sims
        if size in self.lcc_cache:
            nb_sims -= len(self.lcc_cache[size])
        if nb_sims < 0:
            print('[Warning]: {} sims required but {} already performed' \
                  .format(N, len(self.lcc_cache[size])))
            return
        lccs = np.empty(nb_sims, dtype=np.float)
        for i in range(nb_sims):
            lccs[i] = self.get_random_genes_lcc(size)
        if size in self.lcc_cache:
            self.lcc_cache[size] = np.concatenate((self.lcc_cache[size], lccs))
        else:
            self.lcc_cache[size] = lccs

    def _compute_disease_module_density(self, nb_sims, size):
        N = nb_sims
        if size in self.density_cache:
            nb_sims -= len(self.density_cache[size])
        if size <= 0 or nb_sims <= 0:
            return
        densities = np.empty(nb_sims, dtype=np.float)
        for i in range(nb_sims):
            densities[i] = self.get_random_genes_density(size)
        try:
            densities = np.concatenate((self.density_cache[size], densities))
        except (KeyError, ValueError):
            pass
        self.density_cache[size] = densities

    def _compute_disease_modules_clustering(self, nb_sims, size):
        N = nb_sims
        if size in self.clustering_cache:
            nb_sims -= len(self.clustering_cache[size])
        if size < 3 or nb_sims <= 0:
            return
        clustering_coeffs = np.empty(nb_sims, dtype=np.float)
        for i in range(nb_sims):
            clustering_coeffs[i] = self.get_random_genes_clustering(size)
        try:
            clustering_coeffs = np.concatenate(
                (self.clustering_cache[size], clustering_coeffs))
        except (KeyError, ValueError):
            pass
        self.clustering_cache[size] = clustering_coeffs

    def _write_lcc_cache(self):
        IO.save_lcc_cache(self, self.lcc_cache)

    def _write_density_cache(self):
        IO.save_density_cache(self, self.density_cache)

    def _write_clustering_cache(self):
        IO.save_clustering_cache(self, self.clustering_cache)

    def save(self):
        IO.save_interactome(self)