def markCenterExpression(aFN, wigDir, rn = None, tn = None):

        extend = 25
        
        timer = bioLibCG.cgTimer()
        timer.start()

        aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
        aNX.load(['centerExpression', 'tTcc', 'tStart', 'sLength', 'tELevel'], [rn, tn])
        
        #load expression of degradome
        wigDict = cgWig.loadWigDict(wigDir)
        
        for aID in aNX.centerExpression:
                aNX.centerExpression[aID] = [0.0, 0.0, 0.0]      
                chrom, strand, start, end = bioLibCG.tccSplit(aNX.tTcc[aID])
                offset = aNX.tStart[aID]
                sLen = aNX.sLength[aID]

                if strand == '1':
                        start = start - extend + offset
                        end = start + sLen
                else:
                        end = end + extend - offset
                        start = end - sLen

                scanRange = bioLibCG.makeTcc(chrom, strand, start, end)
                stretch = cgWig.getExpressionProfile(scanRange, wigDict)

                #make sure peak is in the small range
                peakLevel = aNX.tELevel[aID]
                peakInRange = (peakLevel in stretch.values())
                

                expressionSum = sum(stretch.values())
                sortedKeys = stretch.keys()
                sortedKeys.sort()

                if strand == '-1':
                        sortedKeys.reverse()
                

                if expressionSum != 0 and peakInRange:

                        sumE = 0.0
                        for key in sortedKeys[8:12]:
                                sumE += stretch[key]
                        aNX.centerExpression[aID][0] = sumE/expressionSum

                        sumE = 0.0
                        for key in sortedKeys[7:13]:
                                sumE += stretch[key]
                        aNX.centerExpression[aID][1] = sumE/expressionSum

                        sumE = 0.0
                        for key in sortedKeys[6:14]:
                                sumE += stretch[key]
                        aNX.centerExpression[aID][2] = sumE/expressionSum
        
        aNX.save()
Exemple #2
0
def markCenterExpression(aFN, wigDir, rn=None, tn=None):

    extend = 25

    timer = bioLibCG.cgTimer()
    timer.start()

    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['centerExpression', 'tTcc', 'tStart', 'sLength', 'tELevel'],
             [rn, tn])

    #load expression of degradome
    wigDict = cgWig.loadWigDict(wigDir)

    for aID in aNX.centerExpression:
        aNX.centerExpression[aID] = [0.0, 0.0, 0.0]
        chrom, strand, start, end = bioLibCG.tccSplit(aNX.tTcc[aID])
        offset = aNX.tStart[aID]
        sLen = aNX.sLength[aID]

        if strand == '1':
            start = start - extend + offset
            end = start + sLen
        else:
            end = end + extend - offset
            start = end - sLen

        scanRange = bioLibCG.makeTcc(chrom, strand, start, end)
        stretch = cgWig.getExpressionProfile(scanRange, wigDict)

        #make sure peak is in the small range
        peakLevel = aNX.tELevel[aID]
        peakInRange = (peakLevel in stretch.values())

        expressionSum = sum(stretch.values())
        sortedKeys = stretch.keys()
        sortedKeys.sort()

        if strand == '-1':
            sortedKeys.reverse()

        if expressionSum != 0 and peakInRange:

            sumE = 0.0
            for key in sortedKeys[8:12]:
                sumE += stretch[key]
            aNX.centerExpression[aID][0] = sumE / expressionSum

            sumE = 0.0
            for key in sortedKeys[7:13]:
                sumE += stretch[key]
            aNX.centerExpression[aID][1] = sumE / expressionSum

            sumE = 0.0
            for key in sortedKeys[6:14]:
                sumE += stretch[key]
            aNX.centerExpression[aID][2] = sumE / expressionSum

    aNX.save()
Exemple #3
0
def updateELevel(oFN, wigDir, rn=None, tn=None):

    oNX = cgNexusFlat.Nexus(oFN, degPeak.degPeak)
    oNX.load(['tcc', 'eLevel'], [rn, tn])

    wigDict = cgWig.loadWigDict(wigDir)

    for oID in oNX.eLevel:

        expandTcc = bioLibCG.expandTcc(oNX.tcc[oID], 3)
        coord_value = cgWig.getExpressionProfile(expandTcc, wigDict)
        oNX.eLevel[oID] = max(coord_value.values())

    oNX.save()
def updateELevel(oFN, wigDir, rn = None, tn = None):
        '''Dont need to do it by chromosome because it is small enough'''
        '''Also dont need to flip the strand because the wig is opposite as well'''

        oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
        oNX.load(['tcc', 'eLevel'], [rn, tn])
        
        wigDict = cgWig.loadWigDict(wigDir)
        
        for oID in oNX.eLevel:
                
                coord_value = cgWig.getExpressionProfile(oNX.tcc[oID], wigDict)
                oNX.eLevel[oID] = max(coord_value.values())

        oNX.save()
def updateELevel(oFN, wigDir, rn = None, tn = None):
        '''Dont need to do it by chromosome because it is small enough'''
        '''Also dont need to flip the strand because the wig is opposite as well'''

        oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
        oNX.load(['tcc', 'eLevel'], [rn, tn])
        
        wigDict = cgWig.loadWigDict(wigDir)
        
        for oID in oNX.eLevel:
                
                coord_value = cgWig.getExpressionProfile(oNX.tcc[oID], wigDict)
                oNX.eLevel[oID] = max(coord_value.values())

        oNX.save()
def updateELevel(oFN, wigDir, rn = None, tn = None):
        
        oNX = cgNexusFlat.Nexus(oFN, degPeak.degPeak)
        oNX.load(['tcc', 'eLevel'], [rn, tn])
        
        wigDict = cgWig.loadWigDict(wigDir)
        
        for oID in oNX.eLevel:
               
                
                expandTcc = bioLibCG.expandTcc(oNX.tcc[oID], 3)
                coord_value = cgWig.getExpressionProfile(expandTcc, wigDict)
                oNX.eLevel[oID] = max(coord_value.values())

        
        
        oNX.save()
def getPairInfo2(oFN, aFN, oWigDir, aWigDir, outFN, assembly):

        oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
        oNX.load(['tcc', 'filteredTargets'])

        aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
        aNX.load(['tTcc', 'tStart', 'tEnd', 'tLength', 'mismatchPositions'])
        
        #get expression Dicts for alignments and small RNAs               
        oWigDict = cgWig.loadWigDict(oWigDir)
        aWigDict = cgWig.loadWigDict(aWigDir)
       
        myG = gf.GenomeFetch(assembly)

        fOut = open(outFN, 'w')
        for oID in oNX.tcc:
                for aID in oNX.filteredTargets[oID]:


                        #expand the oTcc to fit the target
                        oChrom, oStrand, oStart, oEnd = bioLibCG.tccSplit(oNX.tcc[oID])
                        if oStrand == '1':
                                oStart -= aNX.tStart[aID] #5' 
                                oEnd += (aNX.tLength[aID] - 1) - aNX.tEnd[aID] #3'
                                oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd)
                        elif oStrand == '-1':                                
                                oStart -= (aNX.tLength[aID] - 1) - aNX.tEnd[aID] #3'
                                oEnd += aNX.tStart[aID] #5'
                                oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd)
                                
                        #expand the peak tcc to be the target tcc (peak is only 1-4nt long)
                        aChrom, aStrand, aStart, aEnd = bioLibCG.tccSplit(aNX.tTcc[aID])
                        aStart -= 25
                        aEnd += 25
                        aTcc = bioLibCG.makeTcc(aChrom, aStrand, aStart, aEnd)
                        
                        
                        #get expression
                        oCoord_value = cgWig.getExpressionProfile(oTcc, oWigDict)
                        aCoord_value = cgWig.getExpressionProfile(aTcc, aWigDict)
                        
                        oValues = [oCoord_value[x] for x in sorted(oCoord_value.keys())]
                        aValues = [aCoord_value[x] for x in sorted(aCoord_value.keys())]

                        if oStrand == '-1':
                                oValues.reverse()
                        if aStrand == '-1':
                                aValues.reverse()

                        #get sequences
                        aSeq = myG.getSequence(aTcc)
                        oSeq = myG.getSequence(oTcc)
                        
                        pString = [str(oID),
                                   str(aID),
                                   ','.join([str(x) for x in oValues]),
                                   ','.join([str(x) for x in aValues]),
                                   str(aNX.tStart[aID]),
                                   str(aNX.tEnd[aID]),
                                   oNX.tcc[oID],
                                   aNX.tTcc[aID],
                                   oSeq,
                                   aSeq,
                                   ','.join([str(x) for x in aNX.mismatchPositions[aID]])]
                       
                        fOut.write('\t'.join(pString) + '\n')

        fOut.close()                        
def getPairInfo(oFN, aFN, oWigDir, aWigDir, outFN, assembly):

        oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
        oNX.load(['tcc', 'filteredTargets', 'sequence'])

        aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
        aNX.load(['tTcc', 'tStart', 'tEnd', 'tLength', 'targetSequence'])
        
        #get expression Dicts for alignments and small RNAs               
        oWigDict = cgWig.loadWigDict(oWigDir)
        aWigDict = cgWig.loadWigDict(aWigDir)
       
        myG = gf.GenomeFetch(assembly)

        fOut = open(outFN, 'w')
        for oID in oNX.tcc:
                for aID in oNX.filteredTargets[oID]:


                        #expand the oTcc to fit the target
                        oChrom, oStrand, oStart, oEnd = bioLibCG.tccSplit(oNX.tcc[oID])
                        if oStrand == '-1':
                                oStart -= (aNX.tLength[aID] - 1) - aNX.tEnd[aID]
                                oEnd +=  aNX.tStart[aID]
                                oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd)
                        elif oStrand == '1':
                                oStart -= aNX.tStart[aID]
                                oEnd += (aNX.tLength[aID] - 1) - aNX.tEnd[aID]
                                oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd)

                        #expand the peak tcc to be the target tcc (peak is only 1-4nt long)
                        aChrom, aStrand, aStart, aEnd = bioLibCG.tccSplit(aNX.tTcc[aID])
                        aStart -= 25
                        aEnd += 25
                        aTcc = bioLibCG.makeTcc(aChrom, aStrand, aStart, aEnd)
                        
                        print oNX.tcc[oID], oTcc, aNX.tTcc[aID], aTcc
                        
                        #get expression
                        oCoord_value = cgWig.getExpressionProfile(oTcc, oWigDict)
                        aCoord_value = cgWig.getExpressionProfile(aTcc, aWigDict)
                        for aCoord in sorted(aCoord_value.keys()):
                                print aCoord, aCoord_value[aCoord]
                        
                        oValues = [oCoord_value[x] for x in sorted(oCoord_value.keys())]
                        aValues = [aCoord_value[x] for x in sorted(aCoord_value.keys())]

                        maxO = max(oValues[aNX.tStart[aID]:aNX.tEnd[aID]]) # might want to change to max only within the small range
                        maxA = max(aValues)
                      
                        oValues = [(float(x)/maxO) * 20 for x in oValues]
                        aValues = [(float(x)/maxA) * 50 for x in aValues]

                        #get sequence
                        aSeq = myG.getSequence(aTcc)
                        oSeq = myG.getSequence(oTcc)
                        
                        letter_value = zip(aSeq, aValues)
                        for letter, value in letter_value:
                                print letter, value

                        pString = [str(oID), str(aID), ','.join([str(x) for x in oValues]), ','.join([str(x) for x in aValues]), str(maxO), str(maxA), str(aNX.tStart[aID]), str(aNX.tEnd[aID]), oNX.tcc[oID], aNX.tTcc[aID], oSeq, aSeq]
                        fOut.write('\t'.join(pString) + '\n')

        fOut.close()                        
Exemple #9
0
def getPairInfo2(oFN, aFN, oWigDir, aWigDir, outFN, assembly):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['snrSS', 'tcc', 'filteredTargets', 'context'])

    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(
        ['tTcc', 'tStart', 'tEnd', 'tLength', 'mismatchPositions', 'context'])

    #get expression Dicts for alignments and small RNAs
    oWigDict = cgWig.loadWigDict(oWigDir)
    aWigDict = cgWig.loadWigDict(aWigDir)

    myG = gf.GenomeFetch(assembly)

    fOut = open(outFN, 'w')
    for oID in oNX.tcc:
        for aID in oNX.filteredTargets[oID]:

            #expand the oTcc to fit the target
            oChrom, oStrand, oStart, oEnd = bioLibCG.tccSplit(oNX.tcc[oID])
            if oStrand == '1':
                oStart -= aNX.tStart[aID]  #5'
                oEnd += (aNX.tLength[aID] - 1) - aNX.tEnd[aID]  #3'
                oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd)
            elif oStrand == '-1':
                oStart -= (aNX.tLength[aID] - 1) - aNX.tEnd[aID]  #3'
                oEnd += aNX.tStart[aID]  #5'
                oTcc = bioLibCG.makeTcc(oChrom, oStrand, oStart, oEnd)

            #expand the peak tcc to be the target tcc (peak is only 1-4nt long)
            aChrom, aStrand, aStart, aEnd = bioLibCG.tccSplit(aNX.tTcc[aID])
            aStart -= 25
            aEnd += 25
            aTcc = bioLibCG.makeTcc(aChrom, aStrand, aStart, aEnd)

            #get expression
            oCoord_value = cgWig.getExpressionProfile(oTcc, oWigDict)
            aCoord_value = cgWig.getExpressionProfile(aTcc, aWigDict)

            oValues = [oCoord_value[x] for x in sorted(oCoord_value.keys())]
            aValues = [aCoord_value[x] for x in sorted(aCoord_value.keys())]

            if oStrand == '-1':
                oValues.reverse()
            if aStrand == '-1':
                aValues.reverse()

            #get sequences
            aSeq = myG.getSequence(aTcc)
            oSeq = myG.getSequence(oTcc)

            oSNR = oNX.snrSS[oID]
            oContext = oNX.context[oID]
            aContext = aNX.context[aID]

            pString = [
                str(oID),
                str(aID), ','.join([str(x) for x in oValues]),
                ','.join([str(x) for x in aValues]),
                str(aNX.tStart[aID]),
                str(aNX.tEnd[aID]), oNX.tcc[oID], aNX.tTcc[aID], oSeq, aSeq,
                ','.join([str(x) for x in aNX.mismatchPositions[aID]]),
                str(oSNR), oContext, aContext
            ]

            fOut.write('\t'.join(pString) + '\n')

    fOut.close()