Esempio n. 1
0
 def getSubjectAsMapOfQuery(self):
     iMap = Map()
     iMap.name = self.range_subject.seqname
     iMap.seqname = self.range_query.seqname
     if self.range_subject.isOnDirectStrand():
         iMap.start = self.range_query.start
         iMap.end = self.range_query.end
     else:
         iMap.start = self.range_query.end
         iMap.end = self.range_query.start
     return iMap
def removeRedundantMembersDueToChunkOverlaps( dGroupId2MemberHeaders, dGr2Mb, chunkOverlap ):
    # for each group
    for groupID in dGroupId2MemberHeaders.keys():
        if verbose > 1:
            print "group %s:" % ( groupID )
        if groupID not in [ "3446" ]:
            #continue
            pass

        # get members into Map object, per chunk name
        dChunkName2Map = {}
        for memberH in dGroupId2MemberHeaders[ groupID ]:
            if verbose > 1: print memberH
            tokens = memberH.split(" ")
            if "," not in tokens[3]:
                m = Map()
                m.name = tokens[0]
                m.seqname = tokens[1]
                m.start = int( tokens[3].split("..")[0] )
                m.end = int( tokens[3].split("..")[1] )
                dChunkName2Map[ m.seqname ] = [ m ]
            else:
                dChunkName2Map[ tokens[1] ] = []
                for i in tokens[3].split(","):
                    m = Map()
                    m.name = tokens[0]
                    m.seqname = tokens[1]
                    m.start = int( i.split("..")[0] )
                    m.end = int( i.split("..")[1] )
                    dChunkName2Map[ m.seqname ].append( m )

        # remove chunks without previous or next chunks
        for chunkName in dChunkName2Map.keys():
            chunkId = int( chunkName.split("chunk")[1] )
            if not ( dChunkName2Map.has_key( "chunk%i" % ( chunkId + 1 ) ) \
                     or dChunkName2Map.has_key( "chunk%i" % ( chunkId - 1 ) ) ):
                del dChunkName2Map[ chunkName ]
                continue

        # for each pair of chunk overlap, remove one chunk
        lChunkNames = dChunkName2Map.keys()
        lChunkNames.sort()
        out = []
        for i in range(0,len(lChunkNames), 2):
            del dChunkName2Map[ lChunkNames[i] ]

        # remove members outside chunk overlap (~< 10000 bp)
        for chunkName in dChunkName2Map.keys():
            out = []
            for index, m in enumerate( dChunkName2Map[ chunkName ][:] ):
                if m.getMax() <= 1.1 * chunkOverlap:
                    out.append( dChunkName2Map[ chunkName ][ index ] )
            dChunkName2Map[ chunkName ] = out
            if len(dChunkName2Map[ chunkName ]) == 0:
                del dChunkName2Map[ chunkName ]

        if verbose > 1:
            print "all members:", dGr2Mb[ groupID ]
            print "chunks to clean:", dChunkName2Map.keys()
        lMembersToRemove = []
        for i in dChunkName2Map.keys():
            for j in dChunkName2Map[ i ]:
                mbId = j.name.split("Gr")[0]
                if "Q" in mbId:
                    mbId = mbId.split("Q")[1]
                elif "S" in mbId:
                    mbId = mbId.split("S")[1]
                lMembersToRemove.append( mbId )
        out = []
        for index, k in enumerate( dGr2Mb[ groupID ][:] ):
            if k not in lMembersToRemove:
                out.append( dGr2Mb[ groupID ][index] )
        dGr2Mb[ groupID ] = out
        if verbose > 1:
            print "members to keep:", dGr2Mb[ groupID ]
            sys.stdout.flush()

    return dGr2Mb