def run(self, experiment: Experiment) -> SPIAResult: """ Returns: a list of pathways: pathway id, pathway name, pNDE, pPERT, pG, FDR correction, Bonferroni correction, status for each pathway """ pvalue = ttest(experiment) <= self.threshold calc_f = experiment.calculate_fold_change() all = pvalue.index.tolist() for a in range(len(all)): all[a] = all[a].name p = pvalue[pvalue == True].index.tolist() de = {} for i in p: de[i.name] = calc_f["FC"][i] json = {} if len(de) == 0: # if there are no DEGs anywhere, the problem of finding the impact on various pathways is meaningless print('No differentialy expressed genes.') return SPIAResult([]) db = KEGGPathways(self.organism) pathways = {} for gene in de.keys(): ps = db.search_by_gene(gene) for (k, v) in ps.items(): if k not in pathways.keys(): pathways[k] = v if not pathways: print('No pathways found in database.') return SPIAResult([]) for (id, descr) in pathways.items(): pathway = db.get_pathway(id) path_genes = set(pathway.nodes) path_genes = list(path_genes) interaction_list = {i: [] for i in rel} x = get_edge_attributes(pathway, 'type') for gene1, interaction in x.items(): interaction = '_'.join(interaction) if interaction in interaction_list.keys(): interaction_list[interaction].append([ path_genes.index(gene1[0]), path_genes.index(gene1[1]) ]) else: interaction_list[interaction] = [[ path_genes.index(gene1[0]), path_genes.index(gene1[1]) ]] interaction_list['row_names'] = path_genes json[id] = interaction_list json['id2name'] = pathways s = SPIA.calculate_spia(de, all, json) result = SPIAResult(s) if self.markdown: result.generate_markdown(self.markdown, 'Results of SPIA:') return result
def run(self, experiment: Experiment) -> ImpactAnalysisResult: """ Returns: list of pathways sorted by their impact factor. Each pathway in the list has values of FDR and Bonferroni corrections assigned. """ self.experiment_genes = set( [gene.name for gene in experiment.get_all().genes]) # calculate fold change self.FC = experiment.calculate_fold_change() # remove genes for witch fold change cannot be calculated correctly experiment.exclude_genes( list(self.FC['FC'][isnan(self.FC['FC'])].index)) if self.degs: self.degs = pd.Series({ Gene(x): True for x in self.degs if Gene(x) not in self.experiment_genes }) else: # select differentialy expressed genes pvalue = ttest(experiment) <= self.threshold self.degs = pvalue[pvalue == True] if self.degs.size == 0: # if there are no DEGs anywhere, the problem of finding the impact on various pathways is meaningless print('No differentialy expressed genes.') return ImpactAnalysisResult([]) db = KEGGPathways(self.org) pathways = {} for gene in [g.name for g in list(self.degs.index)]: ps = db.search_by_gene(gene) for (k, v) in ps.items(): if k not in pathways.keys(): pathways[k] = v if not pathways: print('No pathways found in database.') return ImpactAnalysisResult([]) res = pd.DataFrame(columns=['name', 'IF', 'pvalue']) for (code, descr) in pathways.items(): pathway = db.get_pathway(code) impact_factor, pval = self.calculate_impact_factor( experiment, pathway) if impact_factor is not None and pval is not None: res.loc[len(res.index)] = [descr, impact_factor, pval] res['FDR'], res['Bonferroni'] = self.calculate_corrections( res['pvalue']) ifp_pathways = [IAPathway(res.loc[i]) for i in range(len(res.index))] ifp_pathways.sort(key=lambda x: x.IF if not isnan(x.IF) else 0, reverse=True) result = ImpactAnalysisResult(ifp_pathways) if self.markdown: result.generate_markdown(self.markdown, 'Results of Impact Analysis:') return result