Beispiel #1
0
def updatePolySeqs(mFN, readsFN, alignFN):

    tim = bioLibCG.cgTimer()
    tim.start()
    variousAs = ["A" * x for x in range(1,20)]
    variousGs = ["G" * x for x in range(1,20)]
    variousTs = ["T" * x for x in range(1,20)]
    variousCs = ["C" * x for x in range(1,20)]

    letter_variousLetters = [ ("A", variousAs),
                            ("G", variousGs),
                            ("T", variousTs),
                            ("C", variousCs)]


    checkRange = range(1,8)

    NX = cgNexusFlat.Nexus(mFN, miR)
    NX.load(['sequence', 'polySeqs'])
    #print 'load micro', tim.split() 

    reads = cgNexusFlat.quickTable(('read','string', '.', 1))
    rNX = cgNexusFlat.Nexus(readsFN, reads)
    rNX.load(['read'])
    #print 'load reads', tim.split() 

    aNX = cgNexusFlat.Nexus(alignFN, cgAlignment)
    aNX.load(['sID', 'tID'])
    #print 'load alignments', tim.split() 

    for id in aNX.ids:

        theRead = rNX.read[aNX.sID[id]]
        mID = aNX.tID[id]
        microSeq = NX.sequence[mID]

        #may be a read for expression, but wont count...
        if theRead in microSeq: continue

        #just for expression
        if microSeq == theRead: 
            print tabIt(microSeq, theRead, 0, 0, "N")

        #first check full
        elif microSeq in theRead and (len(theRead) != len(microSeq)):
            tail = theRead.split(microSeq)[1]
            for let, variousLetters in letter_variousLetters:
                if tail in variousLetters:
                    print tabIt(microSeq, theRead, 0, len(tail), let)

        #now check trimmed (cant do [:-0])
        else:
            for i in checkRange:
                if microSeq[:-i] in theRead and (len(theRead) != len(microSeq[:-i])):
                    tail = theRead.split(microSeq[:-i])[1]
                    for let, variousLetters in letter_variousLetters:
                        if tail in variousLetters:
                            print tabIt(microSeq, theRead, i, len(tail), let)
                            print "TRIMMED"
                    break #dont trim after the first trimmed one works                           
Beispiel #2
0
def plotBar(analysisFN):

    polyTable = cgNexusFlat.quickTable(
        ('microSeq', 'string', '.', 0), ('readSeq', 'string', '.', 1),
        ('trimAmount', 'int', 0, 2), ('tailLength', 'int', 0, 3),
        ('letter', 'string', 'N', 4))
    polyNX = cgNexusFlat.Nexus(analysisFN, polyTable, ids=False)
    polyNX.load(['trimAmount', 'tailLength', 'letter'])

    letter_trim_tLen_count = {}

    for id in polyNX.ids:
        let = polyNX.letter[id]
        trim = polyNX.trimAmount[id]
        tLen = polyNX.tailLength[id]

        prevCount = letter_trim_tLen_count.setdefault(let, {}).setdefault(
            trim, {}).setdefault(tLen, 0)
        letter_trim_tLen_count[let][trim][tLen] = prevCount + 1

    letters = ["A", "T", "C", "G"]

    ind = range(1, 5)
    width = 0.35  # the width of the bars

    for trim in range(0, 8):
        for tLen in range(0, 7):
            plotNum = (trim * 7) + tLen
            plt.subplot(8, 7, plotNum)
            letAmount = []
            for let in letters:
                letAmount.append(letter_trim_tLen_count[let].setdefault(
                    trim, {}).setdefault(tLen, 0))
            #normalize
            theMax = max(letAmount)
            if theMax == 0:
                letAmount = [0, 0, 0, 0]
            else:
                letAmount = [float(x + .01) / theMax for x in letAmount]
            plt.bar(ind, letAmount, width)
            plt.title('%s,%s %s' % (trim, tLen, theMax))
            plt.xticks([], [''])
            plt.yticks([], [''])

    plt.show()
def plotBar(analysisFN):

    polyTable = cgNexusFlat.quickTable(('microSeq', 'string', '.', 0),
                                    ('readSeq', 'string', '.', 1),
                                    ('trimAmount', 'int', 0, 2),
                                    ('tailLength', 'int', 0, 3),
                                    ('letter', 'string', 'N', 4)) 
    polyNX = cgNexusFlat.Nexus(analysisFN, polyTable, ids = False)
    polyNX.load(['trimAmount', 'tailLength', 'letter'])

    letter_trim_tLen_count = {}

    for id in polyNX.ids:
        let = polyNX.letter[id]
        trim = polyNX.trimAmount[id]
        tLen = polyNX.tailLength[id]

        prevCount = letter_trim_tLen_count.setdefault(let, {}).setdefault(trim, {}).setdefault(tLen, 0)
        letter_trim_tLen_count[let][trim][tLen] = prevCount + 1


    letters = ["A", "T", "C", "G"]

    ind = range(1,5) 
    width = 0.35       # the width of the bars

    for trim in range(0, 8):
        for tLen in range(0,7):
            plotNum = (trim * 7) + tLen
            plt.subplot(8,7, plotNum)
            letAmount = []
            for let in letters: 
                letAmount.append(letter_trim_tLen_count[let].setdefault(trim, {}).setdefault(tLen, 0))
            #normalize
            theMax = max(letAmount)
            if theMax == 0:
                letAmount = [0,0,0,0]
            else:
                letAmount = [float(x+ .01)/theMax for x in letAmount]
            plt.bar(ind, letAmount, width)
            plt.title('%s,%s %s' % (trim, tLen, theMax) )
            plt.xticks([], [''])
            plt.yticks([], [''])

    plt.show()