def test_get_pathway():
    db = KEGGPathways()
    pathway = db.get_pathway('hsa04630')
    assert all(
        [pathway.node[node]['type'] == 'gene' for node in pathway.nodes])
    assert all(
        isinstance(attr, list)
        for attr in nx.get_edge_attributes(pathway, 'type').values())
    assert all([pathway.degree(node) > 0 for node in pathway.nodes])
Exemple #2
0
    def run(self, experiment: Experiment) -> SPIAResult:
        """

        Returns: a list of pathways: pathway id, pathway name, pNDE, pPERT, pG, FDR correction,
            Bonferroni correction, status for each pathway

        """
        pvalue = ttest(experiment) <= self.threshold
        calc_f = experiment.calculate_fold_change()
        all = pvalue.index.tolist()
        for a in range(len(all)):
            all[a] = all[a].name
        p = pvalue[pvalue == True].index.tolist()
        de = {}
        for i in p:
            de[i.name] = calc_f["FC"][i]
        json = {}
        if len(de) == 0:
            # if there are no DEGs anywhere, the problem of finding the impact on various pathways is meaningless
            print('No differentialy expressed genes.')
            return SPIAResult([])
        db = KEGGPathways(self.organism)
        pathways = {}
        for gene in de.keys():
            ps = db.search_by_gene(gene)
            for (k, v) in ps.items():
                if k not in pathways.keys():
                    pathways[k] = v
        if not pathways:
            print('No pathways found in database.')
            return SPIAResult([])
        for (id, descr) in pathways.items():
            pathway = db.get_pathway(id)
            path_genes = set(pathway.nodes)
            path_genes = list(path_genes)
            interaction_list = {i: [] for i in rel}
            x = get_edge_attributes(pathway, 'type')
            for gene1, interaction in x.items():
                interaction = '_'.join(interaction)
                if interaction in interaction_list.keys():
                    interaction_list[interaction].append([
                        path_genes.index(gene1[0]),
                        path_genes.index(gene1[1])
                    ])
                else:
                    interaction_list[interaction] = [[
                        path_genes.index(gene1[0]),
                        path_genes.index(gene1[1])
                    ]]
            interaction_list['row_names'] = path_genes
            json[id] = interaction_list
        json['id2name'] = pathways
        s = SPIA.calculate_spia(de, all, json)
        result = SPIAResult(s)
        if self.markdown:
            result.generate_markdown(self.markdown, 'Results of SPIA:')
        return result
Exemple #3
0
    def name_geneid(data, geneids):
        geneid = []

        for geny in geneids:
            geneid.append(
                KEGGPathways().get_gene_code(gen=geny.name).split()[0])

        data['gene_name'] = data.index
        data.index = geneid

        return data, geneid
def test_keggpathways_init():
    db = KEGGPathways()
    assert db.organism == "hsa"
    db1 = KEGGPathways("Gallus gallus")
    db2 = KEGGPathways("gallus gallus")
    db3 = KEGGPathways("chicken")
    db4 = KEGGPathways("Chicken")
    db5 = KEGGPathways("ChIcKeN")
    assert all([d.organism == "gga" for d in [db1, db2, db3, db4, db5]])
    def run(self, experiment: Experiment) -> ImpactAnalysisResult:
        """

        Returns:
            list of pathways sorted by their impact factor. Each pathway in the list has values of FDR and
            Bonferroni corrections assigned.
        """
        self.experiment_genes = set(
            [gene.name for gene in experiment.get_all().genes])

        # calculate fold change
        self.FC = experiment.calculate_fold_change()

        # remove genes for witch fold change cannot be calculated correctly
        experiment.exclude_genes(
            list(self.FC['FC'][isnan(self.FC['FC'])].index))

        if self.degs:
            self.degs = pd.Series({
                Gene(x): True
                for x in self.degs if Gene(x) not in self.experiment_genes
            })
        else:
            # select differentialy expressed genes
            pvalue = ttest(experiment) <= self.threshold
            self.degs = pvalue[pvalue == True]

        if self.degs.size == 0:
            # if there are no DEGs anywhere, the problem of finding the impact on various pathways is meaningless
            print('No differentialy expressed genes.')
            return ImpactAnalysisResult([])

        db = KEGGPathways(self.org)
        pathways = {}

        for gene in [g.name for g in list(self.degs.index)]:
            ps = db.search_by_gene(gene)
            for (k, v) in ps.items():
                if k not in pathways.keys():
                    pathways[k] = v

        if not pathways:
            print('No pathways found in database.')
            return ImpactAnalysisResult([])

        res = pd.DataFrame(columns=['name', 'IF', 'pvalue'])
        for (code, descr) in pathways.items():
            pathway = db.get_pathway(code)
            impact_factor, pval = self.calculate_impact_factor(
                experiment, pathway)
            if impact_factor is not None and pval is not None:
                res.loc[len(res.index)] = [descr, impact_factor, pval]

        res['FDR'], res['Bonferroni'] = self.calculate_corrections(
            res['pvalue'])
        ifp_pathways = [IAPathway(res.loc[i]) for i in range(len(res.index))]
        ifp_pathways.sort(key=lambda x: x.IF if not isnan(x.IF) else 0,
                          reverse=True)

        result = ImpactAnalysisResult(ifp_pathways)
        if self.markdown:
            result.generate_markdown(self.markdown,
                                     'Results of Impact Analysis:')
        return result
def test_get_gene_code():
    gen = KEGGPathways().get_gene_code('BIOINF2018')
    assert gen == ''
def test_get_organism_code():
    with pytest.raises(KeyError):
        KEGGPathways().get_organism_code('H**o bioinformaticus')
def test_search_by_gene():
    db = KEGGPathways()
    assert isinstance(db.search_by_gene('BRCA2'), dict)
    pathways = db.search_by_gene('TheMostImportantGene')
    assert pathways == {}