Esempio n. 1
0
def check_ORNA_in_ago(oFN, oFF, agoFN, clippingAmount = 1):
   
    NX = Nexus(oFN, oFF)
    NX.load(['sequence', 'geneNames'])
    
    #make truncated sequences
    id_sequence = NX.createMap('id', 'sequence')
    if clippingAmount > 0:
        id_sequence = dict( (i, j[clippingAmount:-clippingAmount]) for i,j in id_sequence.items())
   
    #get fastq sequences
    agoF = open(agoFN, 'r')
    agoSeqs = []
    while True:
        fPacket = nextFilePacket(agoF, 4)
        if not fPacket: break
        agoSeqs.append(fPacket[1])
    agoF.close()

    #count for each oRNA
    id_count = {}
    for id, seq in id_sequence.items():
        for agoSeq in agoSeqs:
            if seq in agoSeq:
                id_count[id] = id_count.get(id, 0) + 1

    #out
    totalCount = 0
    for id, count in id_count.items():
        NX.id = id
        print '%s\t%s\t%s' % (id, count, NX.geneNames)
        totalCount += count

    print totalCount
Esempio n. 2
0
def check_ORNA_in_ago(oFN, oFF, agoFN, clippingAmount=1):

    NX = Nexus(oFN, oFF)
    NX.load(['sequence', 'geneNames'])

    #make truncated sequences
    id_sequence = NX.createMap('id', 'sequence')
    if clippingAmount > 0:
        id_sequence = dict((i, j[clippingAmount:-clippingAmount])
                           for i, j in id_sequence.items())

    #get fastq sequences
    agoF = open(agoFN, 'r')
    agoSeqs = []
    while True:
        fPacket = nextFilePacket(agoF, 4)
        if not fPacket: break
        agoSeqs.append(fPacket[1])
    agoF.close()

    #count for each oRNA
    id_count = {}
    for id, seq in id_sequence.items():
        for agoSeq in agoSeqs:
            if seq in agoSeq:
                id_count[id] = id_count.get(id, 0) + 1

    #out
    totalCount = 0
    for id, count in id_count.items():
        NX.id = id
        print '%s\t%s\t%s' % (id, count, NX.geneNames)
        totalCount += count

    print totalCount
Esempio n. 3
0
def testMap(fN, fF):

    NX = Nexus(fN, fF)
    NX.load(['geneName', 'numReads', 'otherIDs'])

    geneName_numReads = NX.createMap('otherIDs', 'geneName', False) #not 1to1

    for k,v in geneName_numReads.iteritems():
        print k, v[:5]
        return
Esempio n. 4
0
def cleanForSNR(dataFN, oFF):
    dataNX = Nexus(dataFN, oFF)
    dataNX.load(['numUniqueSims', 'numUFBS', 'snrClean', 'siblingSet'])
    id_numUFBS = dataNX.createMap('id', 'numUFBS')
    id_siblingSet = dataNX.createMap('id', 'siblingSet')

    unusedSiblings = []
    for id, siblingSet in id_siblingSet.iteritems():
        if len(siblingSet) == 1: continue #NOTE: oRNA IDs are in their own sibling set
        numUFBS__id = [(id_numUFBS[x], x) for x in siblingSet]
        numUFBS__id.sort()
        numUFBS__id.pop() #take last one (one we're keeping) out of list 
        unusedIDs = [x[1] for x in numUFBS__id]
        unusedSiblings.extend(unusedIDs)

    #tag unclean oRNA
    while dataNX.nextID():
        if (dataNX.id in unusedSiblings) or (dataNX.numUniqueSims < 10):
            dataNX.snrClean = False
        else:
            dataNX.snrClean = True
    dataNX.save()
Esempio n. 5
0
def updateSimilarSiblings(oFN, oFF, frameLength):

    dataNX = Nexus(oFN, oFF)
    dataNX.load(['sequence', 'siblingSet'])
    oID_sequence = dataNX.createMap('id', 'sequence')
    consolidatedSets = getSimilarORNASets(oID_sequence, frameLength)

    for cSet in consolidatedSets:
        for oID in cSet:
            dataNX.id = oID
            dataNX.siblingSet = list(cSet)

    dataNX.save()
Esempio n. 6
0
def updateSimilarSiblings(oFN, oFF, frameLength):

    dataNX = Nexus(oFN, oFF)
    dataNX.load(['sequence', 'siblingSet'])
    oID_sequence = dataNX.createMap('id', 'sequence')
    consolidatedSets = getSimilarORNASets(oID_sequence, frameLength)

    for cSet in consolidatedSets:
        for oID in cSet:
            dataNX.id = oID
            dataNX.siblingSet = list(cSet)

    dataNX.save()
Esempio n. 7
0
def cleanForSNR(dataFN, oFF):
    dataNX = Nexus(dataFN, oFF)
    dataNX.load(['numUniqueSims', 'numUFBS', 'snrClean', 'siblingSet'])
    id_numUFBS = dataNX.createMap('id', 'numUFBS')
    id_siblingSet = dataNX.createMap('id', 'siblingSet')

    unusedSiblings = []
    for id, siblingSet in id_siblingSet.iteritems():
        if len(siblingSet) == 1:
            continue  #NOTE: oRNA IDs are in their own sibling set
        numUFBS__id = [(id_numUFBS[x], x) for x in siblingSet]
        numUFBS__id.sort()
        numUFBS__id.pop()  #take last one (one we're keeping) out of list
        unusedIDs = [x[1] for x in numUFBS__id]
        unusedSiblings.extend(unusedIDs)

    #tag unclean oRNA
    while dataNX.nextID():
        if (dataNX.id in unusedSiblings) or (dataNX.numUniqueSims < 10):
            dataNX.snrClean = False
        else:
            dataNX.snrClean = True
    dataNX.save()
Esempio n. 8
0
def testConsolidation(oFN, oFF, frameLength):

    dataNX = Nexus(oFN, oFF)
    dataNX.load(['sequence'])
    oID_sequence = dataNX.createMap('id', 'sequence')
    consolidatedSets = getSimilarORNASets(oID_sequence, frameLength)
    
    #check if all oIDs are in set
    allConsolidatedIDs = set()
    [allConsolidatedIDs.add(x) for theSet in consolidatedSets for x in theSet]    
    oIDsSet = set(oID_sequence.keys())
    print "DIFFERENCE"
    print oIDsSet.symmetric_difference(allConsolidatedIDs)

    #check Duplicates
     
    #print out sets to verify that they work 
    for oIDSet in consolidatedSets:
        print 
        print oIDSet
        for oID in oIDSet:
            print oID, oID_sequence[oID]
Esempio n. 9
0
def linkTargetIDs(oFN, oFF, aFN, aFF):

    oNX = Nexus(oFN, oFF)
    oNX.load(['filteredTargets'])

    #just give it some blanks
    if os.path.getsize(aFN) == 0:
        while oNX.nextID():
            oNX.filteredTargets = []
        oNX.save()
        return

    aNX = Nexus(aFN, aFF)
    aNX.load(['sID'])

    sID_aIDs = aNX.createMap('sID', 'id', False)

    for sID, aIDs in sID_aIDs.iteritems():
        oNX.id = sID
        oNX.filteredTargets = aIDs

    oNX.save()
Esempio n. 10
0
def testConsolidation(oFN, oFF, frameLength):

    dataNX = Nexus(oFN, oFF)
    dataNX.load(['sequence'])
    oID_sequence = dataNX.createMap('id', 'sequence')
    consolidatedSets = getSimilarORNASets(oID_sequence, frameLength)

    #check if all oIDs are in set
    allConsolidatedIDs = set()
    [allConsolidatedIDs.add(x) for theSet in consolidatedSets for x in theSet]
    oIDsSet = set(oID_sequence.keys())
    print "DIFFERENCE"
    print oIDsSet.symmetric_difference(allConsolidatedIDs)

    #check Duplicates

    #print out sets to verify that they work
    for oIDSet in consolidatedSets:
        print
        print oIDSet
        for oID in oIDSet:
            print oID, oID_sequence[oID]
Esempio n. 11
0
def linkTargetIDs(oFN, oFF, aFN, aFF):

    oNX = Nexus(oFN, oFF)
    oNX.load(['filteredTargets'])

    #just give it some blanks
    if os.path.getsize(aFN) == 0:
        while oNX.nextID():
            oNX.filteredTargets = []
        oNX.save()
        return
    

    aNX = Nexus(aFN, aFF)
    aNX.load(['sID'])

    sID_aIDs = aNX.createMap('sID', 'id', False)
    
    for sID, aIDs in sID_aIDs.iteritems():
        oNX.id = sID
        oNX.filteredTargets = aIDs

    oNX.save()