def getSubjectAsMapOfQuery(self): iMap = Map() iMap.name = self.range_subject.seqname iMap.seqname = self.range_query.seqname if self.range_subject.isOnDirectStrand(): iMap.start = self.range_query.start iMap.end = self.range_query.end else: iMap.start = self.range_query.end iMap.end = self.range_query.start return iMap
def removeRedundantMembersDueToChunkOverlaps( dGroupId2MemberHeaders, dGr2Mb, chunkOverlap ): # for each group for groupID in dGroupId2MemberHeaders.keys(): if verbose > 1: print "group %s:" % ( groupID ) if groupID not in [ "3446" ]: #continue pass # get members into Map object, per chunk name dChunkName2Map = {} for memberH in dGroupId2MemberHeaders[ groupID ]: if verbose > 1: print memberH tokens = memberH.split(" ") if "," not in tokens[3]: m = Map() m.name = tokens[0] m.seqname = tokens[1] m.start = int( tokens[3].split("..")[0] ) m.end = int( tokens[3].split("..")[1] ) dChunkName2Map[ m.seqname ] = [ m ] else: dChunkName2Map[ tokens[1] ] = [] for i in tokens[3].split(","): m = Map() m.name = tokens[0] m.seqname = tokens[1] m.start = int( i.split("..")[0] ) m.end = int( i.split("..")[1] ) dChunkName2Map[ m.seqname ].append( m ) # remove chunks without previous or next chunks for chunkName in dChunkName2Map.keys(): chunkId = int( chunkName.split("chunk")[1] ) if not ( dChunkName2Map.has_key( "chunk%i" % ( chunkId + 1 ) ) \ or dChunkName2Map.has_key( "chunk%i" % ( chunkId - 1 ) ) ): del dChunkName2Map[ chunkName ] continue # for each pair of chunk overlap, remove one chunk lChunkNames = dChunkName2Map.keys() lChunkNames.sort() out = [] for i in range(0,len(lChunkNames), 2): del dChunkName2Map[ lChunkNames[i] ] # remove members outside chunk overlap (~< 10000 bp) for chunkName in dChunkName2Map.keys(): out = [] for index, m in enumerate( dChunkName2Map[ chunkName ][:] ): if m.getMax() <= 1.1 * chunkOverlap: out.append( dChunkName2Map[ chunkName ][ index ] ) dChunkName2Map[ chunkName ] = out if len(dChunkName2Map[ chunkName ]) == 0: del dChunkName2Map[ chunkName ] if verbose > 1: print "all members:", dGr2Mb[ groupID ] print "chunks to clean:", dChunkName2Map.keys() lMembersToRemove = [] for i in dChunkName2Map.keys(): for j in dChunkName2Map[ i ]: mbId = j.name.split("Gr")[0] if "Q" in mbId: mbId = mbId.split("Q")[1] elif "S" in mbId: mbId = mbId.split("S")[1] lMembersToRemove.append( mbId ) out = [] for index, k in enumerate( dGr2Mb[ groupID ][:] ): if k not in lMembersToRemove: out.append( dGr2Mb[ groupID ][index] ) dGr2Mb[ groupID ] = out if verbose > 1: print "members to keep:", dGr2Mb[ groupID ] sys.stdout.flush() return dGr2Mb