Beispiel #1
0
def goClassEnrichment(geneCluster,geneClusterName,goClassDict,popSize=None,FDRthresh=0.05):
    """
    goClassEnrichment(geneCluster,goClassDict,popSize,FDRthresh=0.05):
    geneCluster     = set(genesGroupedBySomeQuality)
    geneClusterName = UniqeID
    goClassDict     = dict(keys=each GO term In bp, cc, or mf class, vals=set(genesAttchd2GOterm)
    popSize         = int(numberOfGenesConsideredAsPopulation)
    
    Returns List for each GeneSet GOterm Combo:
    [GOterm,GOTermSize,geneSetName,geneClstSize,pVal,BHpVal,FDRthresh,numMatchingGenes,matchingGenes].
    """
    rawPs = []
    for goTerm in goClassDict:
        rawPs.append([goTerm,goEnrichment(geneCluster,goClassDict[goTerm],popSize=popSize)])
    adjPs = benjHochFDR(rawPs)
    
    for i in range(len(adjPs)):
        gTrm      = adjPs[i][0]
        p         = adjPs[i][1]
        bhP       = adjPs[i][2]
        mtchGenes = goClassDict[gTrm].intersection(geneCluster)
        adjPs[i]  = [gTrm,
                     str(len(goClassDict[gTrm])),
                     geneClusterName,
                     str(len(geneCluster)),
                     '%.5g' %(p),
                     '%.5g' %(bhP),
                     str(FDRthresh),
                     str(len(mtchGenes)),
                     str(sorted(list(mtchGenes)))]
    
    
    return adjPs
Beispiel #2
0
import sys

usage = '\n\n\nUSAGE: python rateSNPs.py inFile prob'
if len(sys.argv) != 3:
    print usage
    exit(1)

inFile = sys.argv[1]
prob   = float(sys.argv[2])
    
varPos = []

# Calculate and record >= x pVals
for line in open(inFile,'rU'):
    l = [int(x) for x in line.strip('\n').split('\t')]
    if (l[2] > 0) and (l[1] >= 5):
        # I think that we want cumulative p-val for x or GREATER mismatches
        #   so we use binom.cdf(x-1,n,prob) <-- need to confirm this.  
        #   *** Harsha suggests x or LESS.  I am using that untill I can ask XX.
        #       *** Tried it harshas way and things do NOT look right: 546403	29	29	0	1.0	1.0
        cumP = 1-binom.cdf(l[2]-1,l[1],prob)
        varPos.append(l+[cumP])

# Calculate BH adjusted q-vals
varPos = benjHochFDR(varPos,pValColumn=4)

fOut = open('%s.pr%.5f.qVals' % (inFile,prob), 'w')
for item in varPos:
    fOut.write('%s\n' % ('\t'.join([str(x) for x in item])))
        
        
Beispiel #3
0
      ['H30',0.17969873276261300000],
      ['H31',0.06671269389523740000],
      ['H32',0.52378879019888900000],
      ['H33',0.09512201528646370000],
      ['H34',0.78718046267139600000],
      ['H35',0.56462313820509200000],
      ['H36',0.00288596697049005000],
      ['H37',0.02627986555212090000],
      ['H38',0.00206440762381543000],
      ['H39',0.09935440767001180000],
      ['H40',0.19816649844661500000],
      ['H41',0.22098313443918600000],
      ['H42',0.10902751849802400000],
      ['H43',0.96855523882802700000],
      ['H44',0.00074450579752643400],
      ['H45',0.75891146728055300000],
      ['H46',0.03689279063906480000],
      ['H47',0.00000657187928204422],
      ['H48',0.84144926992132700000],
      ['H49',0.00566326392627456000],
      ['H50',0.90896231228331500000],
      ['H51',0.00007291688192959810],
      ['H52',0.13261734077759300000],
      ['H53',0.71829596327866800000]]



rVals  = benjHoch(ps,)
nrVals = statsDefs.benjHochFDR(ps,pValColumn=1)
test = rVals == nrVals
None