def computeOverlapPval(s1, s2, n): a = len(s1 & s2) b = len(s1 - s2) c = len(s2 - s1) d = n - (a + b + c) probability_value = fisher.compute(a, b, c, d) return fisher.significance(probability_value, a, b, c, d)
def computeTraitStatistics(genes, pfilter): traitSet = set([key for key in gwasDB.__studyByTrait if len(gwasDB.getGenesForTrait(key)) > 0]) traitChi = {} pbar = ProgressBar() pbar.setMaximum(len(traitSet)) pbar.updateProgress(0) i=0 for trait in traitSet: if i % 5 == 0: pbar.updateProgress(0) i+=1 traitGenes = gwasDB.getGenesForTrait(trait,pfilter) listA = traitGenes & genes listC = traitGenes - genes a = len(listA) b = len(genes - traitGenes) c = len(listC) d = len(geneDB.__approved_symbols - (traitGenes | genes)) oddsratio = geneUtils.oddsRatio(a,b,c,d) kappa = geneUtils.kappaStatistic(a,b,c,d) fisher_exact = fisher.compute(a,b,c,d) fisher_p = fisher.significance(fisher_exact, a,b,c,d) traitChi[trait] = (a, oddsratio, kappa, len(traitGenes), fisher_exact, fisher_p, traitGenes) pbar.finalize() return traitChi
def computeTraitDrugLists(RE_genes, drug_genes, pfilter_cutoff): traitSet = set(gwasDB.__studyByTrait.keys()) for trait in traitSet: traitGenes = gwasDB.getGenesForTrait(trait, pfilter_cutoff) if len(traitGenes) == 0: continue RE_drugs = [] other_drugs = [] drug_counts_by_gene = {} drugs_targeting_RE = set([]) allDrugs = set([]) for gene in traitGenes & RE_genes: drug_counts_by_gene[gene] = 0 if gene in drugDB.__drugDict: for drug in drugDB.__drugDict[gene]: RE_drugs.append(drug) drug_counts_by_gene[gene] += len(drugDB.__drugDict[gene]) drugs_targeting_RE |= drugDB.__drugDict[gene] drugs_targeting_disease = set([]) for gene in ((traitGenes & drug_genes) - RE_genes): drug_counts_by_gene[gene] = 0 if gene in drugDB.__drugDict: for drug in drugDB.__drugDict[gene]: other_drugs.append(drug) drug_counts_by_gene[gene] += len(drugDB.__drugDict[gene]) drugs_targeting_disease |= drugDB.__drugDict[gene] drugs_targeting_other_RE = set([]) for gene in RE_genes: drug_counts_by_gene[gene] = 0 if gene in drugDB.__drugDict: drugs_targeting_other_RE |= drugDB.__drugDict[gene] drugs_targeting_other_RE -= drugs_targeting_RE a = len(drugs_targeting_RE) b = len(drugs_targeting_disease) c = len(drugs_targeting_other_RE) d = len(set(drugDB.__drugs.keys()) - (drugs_targeting_RE | drugs_targeting_disease | drugs_targeting_other_RE)) # print a, b, c, d if (a + b) == 0 or (a + c) == 0: odds, kappa = 0, 0 fisher_exact = 1.0 fisher_p = 1.0 else: odds = geneUtils.oddsRatio(a,b,c,d) kappa = geneUtils.kappaStatistic(a,b,c,d) fisher_exact = fisher.compute(a,b,c,d) fisher_p = fisher.significance(fisher_exact, a,b,c,d) __traitMetaAnalysis[trait]['RE_drugs'] = RE_drugs __traitMetaAnalysis[trait]['other_drugs'] = other_drugs __traitMetaAnalysis[trait]['drug_counts'] = drug_counts_by_gene __traitMetaAnalysis[trait]['drugchi'] = (a,b,c,d,odds,kappa,fisher_exact,fisher_p)
def computeTraitGeneLists(RE_genes, drug_genes, pfilter_cutoff): traitSet = set(gwasDB.__studyByTrait.keys()) pbar = ProgressBar() pbar.setMaximum(len(traitSet)) pbar.updateProgress(0) i = 0 for trait in traitSet: if i % 5 == 0: pbar.updateProgress(i) i+=1 traitGenes = gwasDB.getGenesForTrait(trait, pfilter_cutoff) if len(traitGenes) == 0: continue __traitMetaAnalysis[trait] = {} RE = [] for gene in traitGenes & RE_genes: count = len(gwasDB.getTraitsForGene(gene)) RE.append((gene, count)) __traitMetaAnalysis[trait]['RE'] = RE drug = [] for gene in traitGenes & drug_genes: count = len(gwasDB.getTraitsForGene(gene)) drug.append((gene, count)) __traitMetaAnalysis[trait]['drugbank'] = drug other = [] for gene in traitGenes - RE_genes - drug_genes: count = len(gwasDB.getTraitsForGene(gene)) other.append((gene,count)) __traitMetaAnalysis[trait]['other'] = other a = len(traitGenes & RE_genes) b = len(RE_genes - traitGenes) c = len(traitGenes - RE_genes) d = len(geneDB.__approved_symbols - (traitGenes | RE_genes)) oddsratio = geneUtils.oddsRatio(a,b,c,d) kappa = geneUtils.kappaStatistic(a,b,c,d) fisher_exact = fisher.compute(a,b,c,d) fisher_p = fisher.significance(fisher_exact, a,b,c,d) __traitMetaAnalysis[trait]['RE_chi'] = (a, b, c, d, oddsratio, kappa, fisher_exact, fisher_p) a = len(traitGenes & drug_genes) b = len(drug_genes - traitGenes) c = len(traitGenes - drug_genes) d = len(geneDB.__approved_symbols - (traitGenes | drug_genes)) oddsratio = geneUtils.oddsRatio(a,b,c,d) kappa = geneUtils.kappaStatistic(a,b,c,d) fisher_exact = fisher.compute(a,b,c,d) fisher_p = fisher.significance(fisher_exact, a,b,c,d) __traitMetaAnalysis[trait]['drugbank_chi'] = (a, b, c, d, oddsratio, kappa, fisher_exact, fisher_p) __traitMetaAnalysis[trait]['geneset_size'] = len(traitGenes) pbar.finalize()
commonGenes = studyGenes & gwasDB.__geneSet cgWithoutGWAS = studyGenes - gwasDB.__geneSet gwasWithoutCG = gwasDB.__geneSet - studyGenes print "initializing fisher algorithm..." fisher.init(len(geneDB.__approved_symbols)) # chi-square contingency table for gwas and RE a1 = len(commonGenes) b1 = len(gwasWithoutCG) c1 = len(cgWithoutGWAS) d1 = len( geneDB.__approved_symbols - (studyGenes | gwasDB.__geneSet) ) oddsratio1 = geneUtils.oddsRatio(a1,b1,c1,d1) kappa1 = geneUtils.kappaStatistic(a1,b1,c1,d1) fisher_exact1 = fisher.compute(a1,b1,c1,d1) fisherp1 = fisher.significance(fisher_exact1, a1,b1,c1,d1) commonDrugTargets = drugDB.__geneSet & studyGenes a2 = len( commonDrugTargets ) b2 = len( drugDB.__geneSet - studyGenes ) c2 = len( studyGenes - drugDB.__geneSet ) d2 = len( geneDB.__approved_symbols - ( studyGenes | drugDB.__geneSet ) ) oddsratio2 = geneUtils.oddsRatio(a2,b2,c2,d2) kappa2 = geneUtils.kappaStatistic(a2,b2,c2,d2) fisher_exact2 = fisher.compute(a2,b2,c2,d2) fisherp2 = fisher.significance(fisher_exact2, a2,b2,c2,d2)