Esempio n. 1
0
def testPeaks(degFN, dForm, allGeneInfo, gForm, switchStrand = False):

    #load/configure gene Info
    gNX = Nexus(allGeneInfo, gForm)
    gNX.load(['geneName', 'numReads', 'numSpots'])
    
    gName_numReads = {}
    gName_numSpots = {}
    while gNX.nextID():
        gName_numReads[gNX.geneName] = gNX.numReads
        gName_numSpots[gNX.geneName] = gNX.numSpots
   
   
    #load degFN info
    dNX = Nexus(degFN, dForm)
    dNX.load(['tcc', 'eLevel', 'geneNames', 'pValBin'])

    while dNX.nextID():
       
        gNames, readsForPeak = dNX.geneNames, dNX.eLevel
        chrom, strand, start, end = bioLibCG.tccSplit(dNX.tcc)
        if switchStrand:
            strand = -int(strand)
      
        pVals = []
        for gName in gNames:
            
            #may have to change gene name cuz of multiple spans
            try:
                totGeneReads = gName_numReads[gName]
                numSpotsForGene = gName_numSpots[gName]
            except KeyError:

                try:
                    gName = gName + '_RE_%s_%s' % (chrom, strand)
                    totGeneReads = gName_numReads[gName]
                    numSpotsForGene = gName_numSpots[gName]
                except KeyError:
                    print "FIX THIS GENE NAME", gName
                    continue

            #add psuedocount
            totGeneReads += 1
            numSpotsForGene += 1 # not sure whether to do this yet...

            #check for hidden intron gene overlap
            try:
                q = 1.0/numSpotsForGene
            except ZeroDivisionError:
                continue #intron gene

            #add p val
            pVals.append(binom.sf(readsForPeak, totGeneReads, q))

        dNX.pValBin = max(pVals) if pVals else -1.0

    dNX.save()