def computeOverlapPval(s1, s2, n):
    a = len(s1 & s2)
    b = len(s1 - s2)
    c = len(s2 - s1)
    d = n - (a + b + c)
    probability_value = fisher.compute(a, b, c, d)
    return fisher.significance(probability_value, a, b, c, d)
Ejemplo n.º 2
0
def computeTraitStatistics(genes, pfilter):

    
    traitSet = set([key for key in gwasDB.__studyByTrait if len(gwasDB.getGenesForTrait(key)) > 0])
    
    traitChi = {}
    pbar = ProgressBar()

    pbar.setMaximum(len(traitSet))
    pbar.updateProgress(0)
    i=0
    for trait in traitSet:
        if i % 5 == 0:
            pbar.updateProgress(0)

        i+=1
        traitGenes = gwasDB.getGenesForTrait(trait,pfilter)

        listA = traitGenes & genes
        listC = traitGenes - genes

        a = len(listA)
        b = len(genes - traitGenes)
        c = len(listC)
        d = len(geneDB.__approved_symbols - (traitGenes | genes))

        oddsratio = geneUtils.oddsRatio(a,b,c,d)
        kappa = geneUtils.kappaStatistic(a,b,c,d)

        fisher_exact = fisher.compute(a,b,c,d)
        fisher_p = fisher.significance(fisher_exact, a,b,c,d)

        traitChi[trait] = (a, oddsratio, kappa, len(traitGenes),
                fisher_exact, fisher_p, traitGenes)

    pbar.finalize()
    return traitChi
Ejemplo n.º 3
0
def computeTraitDrugLists(RE_genes, drug_genes, pfilter_cutoff):
    
    traitSet = set(gwasDB.__studyByTrait.keys())
    
    for trait in traitSet:
        traitGenes = gwasDB.getGenesForTrait(trait, pfilter_cutoff)
        
        if len(traitGenes) == 0:
            continue
        
        RE_drugs = []
        other_drugs = []
        drug_counts_by_gene = {}
        
        drugs_targeting_RE = set([])
        allDrugs = set([])
        for gene in traitGenes & RE_genes:
            drug_counts_by_gene[gene] = 0
            if gene in drugDB.__drugDict:
                for drug in drugDB.__drugDict[gene]:
                    RE_drugs.append(drug)
                drug_counts_by_gene[gene] += len(drugDB.__drugDict[gene])
                drugs_targeting_RE |= drugDB.__drugDict[gene]

        drugs_targeting_disease = set([])

        for gene in ((traitGenes & drug_genes) - RE_genes):
            drug_counts_by_gene[gene] = 0
            if gene in drugDB.__drugDict:
                for drug in drugDB.__drugDict[gene]:
                    other_drugs.append(drug)
                drug_counts_by_gene[gene] += len(drugDB.__drugDict[gene])
                drugs_targeting_disease |= drugDB.__drugDict[gene]

        drugs_targeting_other_RE = set([])

        for gene in RE_genes:
            drug_counts_by_gene[gene] = 0
            if gene in drugDB.__drugDict:
                drugs_targeting_other_RE |= drugDB.__drugDict[gene]

        drugs_targeting_other_RE -= drugs_targeting_RE

        a = len(drugs_targeting_RE)
        b = len(drugs_targeting_disease)
        c = len(drugs_targeting_other_RE)
        d = len(set(drugDB.__drugs.keys()) - (drugs_targeting_RE |
                drugs_targeting_disease | drugs_targeting_other_RE))
        # print a, b, c, d

        if (a + b) == 0 or (a + c) == 0:
            odds, kappa = 0, 0
            fisher_exact = 1.0
            fisher_p = 1.0
        else:
            odds = geneUtils.oddsRatio(a,b,c,d)
            kappa = geneUtils.kappaStatistic(a,b,c,d)
            fisher_exact = fisher.compute(a,b,c,d)
            fisher_p = fisher.significance(fisher_exact, a,b,c,d)

        __traitMetaAnalysis[trait]['RE_drugs'] = RE_drugs
        __traitMetaAnalysis[trait]['other_drugs'] = other_drugs
        __traitMetaAnalysis[trait]['drug_counts'] = drug_counts_by_gene
        __traitMetaAnalysis[trait]['drugchi'] = (a,b,c,d,odds,kappa,fisher_exact,fisher_p)
Ejemplo n.º 4
0
def computeTraitGeneLists(RE_genes, drug_genes, pfilter_cutoff):
    
    traitSet = set(gwasDB.__studyByTrait.keys())
    
    pbar = ProgressBar()
    pbar.setMaximum(len(traitSet))

    pbar.updateProgress(0)
    i = 0
    for trait in traitSet:
        if i % 5 == 0:
            pbar.updateProgress(i)
        i+=1
        traitGenes = gwasDB.getGenesForTrait(trait, pfilter_cutoff)
        
        if len(traitGenes) == 0: 
            continue
        
        __traitMetaAnalysis[trait] = {}
        
        
        RE = []
        for gene in traitGenes & RE_genes:
            
            count = len(gwasDB.getTraitsForGene(gene))
            RE.append((gene, count))
        
        __traitMetaAnalysis[trait]['RE'] = RE
        
        
        
        drug = []
        for gene in traitGenes & drug_genes:
            
            count = len(gwasDB.getTraitsForGene(gene))
            drug.append((gene, count))
        
        __traitMetaAnalysis[trait]['drugbank'] = drug
        
        
            
        other = []
        for gene in traitGenes - RE_genes - drug_genes:
            
            count = len(gwasDB.getTraitsForGene(gene))
            other.append((gene,count))
        
        __traitMetaAnalysis[trait]['other'] = other
        
        
        
        a = len(traitGenes & RE_genes)
        b = len(RE_genes - traitGenes)
        c = len(traitGenes - RE_genes)
        d = len(geneDB.__approved_symbols - (traitGenes | RE_genes))
        
        oddsratio = geneUtils.oddsRatio(a,b,c,d)
        kappa = geneUtils.kappaStatistic(a,b,c,d)
        fisher_exact = fisher.compute(a,b,c,d)
        fisher_p = fisher.significance(fisher_exact, a,b,c,d)
        
        __traitMetaAnalysis[trait]['RE_chi'] = (a, b, c, d,
                oddsratio, kappa, fisher_exact, fisher_p)
        
        a = len(traitGenes & drug_genes)
        b = len(drug_genes - traitGenes)
        c = len(traitGenes - drug_genes)
        d = len(geneDB.__approved_symbols - (traitGenes | drug_genes))
        
        oddsratio = geneUtils.oddsRatio(a,b,c,d)
        kappa = geneUtils.kappaStatistic(a,b,c,d)
        fisher_exact = fisher.compute(a,b,c,d)
        fisher_p = fisher.significance(fisher_exact, a,b,c,d)
        
        __traitMetaAnalysis[trait]['drugbank_chi'] = (a, b, c, d,
                oddsratio, kappa, fisher_exact, fisher_p)
        
        __traitMetaAnalysis[trait]['geneset_size'] = len(traitGenes)
    
    pbar.finalize()
Ejemplo n.º 5
0
 cgWithoutGWAS = studyGenes - gwasDB.__geneSet
 gwasWithoutCG = gwasDB.__geneSet - studyGenes
 
 print "initializing fisher algorithm..."
 fisher.init(len(geneDB.__approved_symbols))
 
 # chi-square contingency table for gwas and RE
 
 a1 = len(commonGenes)
 b1 = len(gwasWithoutCG)
 c1 = len(cgWithoutGWAS)
 d1 = len( geneDB.__approved_symbols - (studyGenes | gwasDB.__geneSet) )
 oddsratio1 = geneUtils.oddsRatio(a1,b1,c1,d1)
 kappa1 = geneUtils.kappaStatistic(a1,b1,c1,d1)
 fisher_exact1 = fisher.compute(a1,b1,c1,d1)
 fisherp1 = fisher.significance(fisher_exact1, a1,b1,c1,d1)
 
 
 commonDrugTargets = drugDB.__geneSet & studyGenes
 
 a2 = len( commonDrugTargets )
 b2 = len( drugDB.__geneSet - studyGenes )
 c2 = len( studyGenes - drugDB.__geneSet )
 d2 = len( geneDB.__approved_symbols - ( studyGenes | drugDB.__geneSet ) )
 oddsratio2 = geneUtils.oddsRatio(a2,b2,c2,d2)
 kappa2 = geneUtils.kappaStatistic(a2,b2,c2,d2)
 fisher_exact2 = fisher.compute(a2,b2,c2,d2)
 fisherp2 = fisher.significance(fisher_exact2, a2,b2,c2,d2)
 
 
 gwas_drugbank_overlap = gwasDB.__geneSet & drugDB.__geneSet