def test_get_pathway(): db = KEGGPathways() pathway = db.get_pathway('hsa04630') assert all( [pathway.node[node]['type'] == 'gene' for node in pathway.nodes]) assert all( isinstance(attr, list) for attr in nx.get_edge_attributes(pathway, 'type').values()) assert all([pathway.degree(node) > 0 for node in pathway.nodes])
def run(self, experiment: Experiment) -> SPIAResult: """ Returns: a list of pathways: pathway id, pathway name, pNDE, pPERT, pG, FDR correction, Bonferroni correction, status for each pathway """ pvalue = ttest(experiment) <= self.threshold calc_f = experiment.calculate_fold_change() all = pvalue.index.tolist() for a in range(len(all)): all[a] = all[a].name p = pvalue[pvalue == True].index.tolist() de = {} for i in p: de[i.name] = calc_f["FC"][i] json = {} if len(de) == 0: # if there are no DEGs anywhere, the problem of finding the impact on various pathways is meaningless print('No differentialy expressed genes.') return SPIAResult([]) db = KEGGPathways(self.organism) pathways = {} for gene in de.keys(): ps = db.search_by_gene(gene) for (k, v) in ps.items(): if k not in pathways.keys(): pathways[k] = v if not pathways: print('No pathways found in database.') return SPIAResult([]) for (id, descr) in pathways.items(): pathway = db.get_pathway(id) path_genes = set(pathway.nodes) path_genes = list(path_genes) interaction_list = {i: [] for i in rel} x = get_edge_attributes(pathway, 'type') for gene1, interaction in x.items(): interaction = '_'.join(interaction) if interaction in interaction_list.keys(): interaction_list[interaction].append([ path_genes.index(gene1[0]), path_genes.index(gene1[1]) ]) else: interaction_list[interaction] = [[ path_genes.index(gene1[0]), path_genes.index(gene1[1]) ]] interaction_list['row_names'] = path_genes json[id] = interaction_list json['id2name'] = pathways s = SPIA.calculate_spia(de, all, json) result = SPIAResult(s) if self.markdown: result.generate_markdown(self.markdown, 'Results of SPIA:') return result
def name_geneid(data, geneids): geneid = [] for geny in geneids: geneid.append( KEGGPathways().get_gene_code(gen=geny.name).split()[0]) data['gene_name'] = data.index data.index = geneid return data, geneid
def test_keggpathways_init(): db = KEGGPathways() assert db.organism == "hsa" db1 = KEGGPathways("Gallus gallus") db2 = KEGGPathways("gallus gallus") db3 = KEGGPathways("chicken") db4 = KEGGPathways("Chicken") db5 = KEGGPathways("ChIcKeN") assert all([d.organism == "gga" for d in [db1, db2, db3, db4, db5]])
def run(self, experiment: Experiment) -> ImpactAnalysisResult: """ Returns: list of pathways sorted by their impact factor. Each pathway in the list has values of FDR and Bonferroni corrections assigned. """ self.experiment_genes = set( [gene.name for gene in experiment.get_all().genes]) # calculate fold change self.FC = experiment.calculate_fold_change() # remove genes for witch fold change cannot be calculated correctly experiment.exclude_genes( list(self.FC['FC'][isnan(self.FC['FC'])].index)) if self.degs: self.degs = pd.Series({ Gene(x): True for x in self.degs if Gene(x) not in self.experiment_genes }) else: # select differentialy expressed genes pvalue = ttest(experiment) <= self.threshold self.degs = pvalue[pvalue == True] if self.degs.size == 0: # if there are no DEGs anywhere, the problem of finding the impact on various pathways is meaningless print('No differentialy expressed genes.') return ImpactAnalysisResult([]) db = KEGGPathways(self.org) pathways = {} for gene in [g.name for g in list(self.degs.index)]: ps = db.search_by_gene(gene) for (k, v) in ps.items(): if k not in pathways.keys(): pathways[k] = v if not pathways: print('No pathways found in database.') return ImpactAnalysisResult([]) res = pd.DataFrame(columns=['name', 'IF', 'pvalue']) for (code, descr) in pathways.items(): pathway = db.get_pathway(code) impact_factor, pval = self.calculate_impact_factor( experiment, pathway) if impact_factor is not None and pval is not None: res.loc[len(res.index)] = [descr, impact_factor, pval] res['FDR'], res['Bonferroni'] = self.calculate_corrections( res['pvalue']) ifp_pathways = [IAPathway(res.loc[i]) for i in range(len(res.index))] ifp_pathways.sort(key=lambda x: x.IF if not isnan(x.IF) else 0, reverse=True) result = ImpactAnalysisResult(ifp_pathways) if self.markdown: result.generate_markdown(self.markdown, 'Results of Impact Analysis:') return result
def test_get_gene_code(): gen = KEGGPathways().get_gene_code('BIOINF2018') assert gen == ''
def test_get_organism_code(): with pytest.raises(KeyError): KEGGPathways().get_organism_code('H**o bioinformaticus')
def test_search_by_gene(): db = KEGGPathways() assert isinstance(db.search_by_gene('BRCA2'), dict) pathways = db.search_by_gene('TheMostImportantGene') assert pathways == {}