def getMapInstance( self ): iMap = Map() lAttributes = [] lAttributes.append( self.name ) lAttributes.append( self.seqname ) lAttributes.append( self.start ) lAttributes.append( self.end ) iMap.setFromTuple( lAttributes ) return iMap
def getDictPerSeqNameFromMapFile( mapFile ): dSeqName2Maps = {} mapFileHandler = open( mapFile, "r" ) while True: line = mapFileHandler.readline() if line == "": break iMap = Map() iMap.setFromString( line, "\t" ) if not dSeqName2Maps.has_key( iMap.seqname ): dSeqName2Maps[ iMap.seqname ] = [] dSeqName2Maps[ iMap.seqname ].append( iMap ) mapFileHandler.close() return dSeqName2Maps
def createAndLoadTable(self): """ Create the table and load the map data from input table """ self.db.create_table(self.db, self.tableName, "", "map") f = open(self.inputFileName, "r") iMap = Map() lMap = [] while iMap.read(f): lMap.append(iMap) iMap = Map() f.close() self._tMapA = TableMapAdaptator(self.db, self.tableName) self._tMapA.insMapList(lMap)
def mergeMatchesOnQueries(inFile, outFile): mapFile = "%s.map" % ( inFile ) PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly( inFile, mapFile ) cmd = "mapOp" cmd += " -q %s" % ( mapFile ) cmd += " -m" cmd += " 2>&1 > /dev/null" exitStatus = os.system( cmd ) if exitStatus != 0: print "ERROR: mapOp returned %i" % ( exitStatus ) sys.exit(1) os.remove( mapFile ) mergeFile = "%s.merge" % ( mapFile ) mergeFileHandler = open( mergeFile, "r" ) outFileHandler = open( outFile, "w" ) m = Map() while True: line = mergeFileHandler.readline() if line == "": break m.reset() m.setFromString( line, "\t" ) m.writeAsQueryOfPath( outFileHandler ) mergeFileHandler.close() os.remove( mergeFile ) outFileHandler.close()
def getDictPerNameFromMapFile( mapFile ): dName2Maps = {} mapFileHandler = open( mapFile, "r" ) while True: line = mapFileHandler.readline() if line == "": break iMap = Map() iMap.setFromString( line, "\t" ) if dName2Maps.has_key( iMap.name ): if iMap == dName2Maps[ iMap.name ]: continue else: msg = "ERROR: in file '%s' two different Map instances have the same name '%s'" % ( mapFile, iMap.name ) sys.stderr.write( "%s\n" % ( msg ) ) sys.exit(1) dName2Maps[ iMap.name ] = iMap mapFileHandler.close() return dName2Maps
def diff(self, o): iMap = Map.diff(self, o.getMap()) new = Set() if not iMap.isEmpty(): new.id = self.id new.name = self.name new.seqname = self.seqname new.start = iMap.start new.end = iMap.end return new
def getSubjectAsMapOfQuery(self): iMap = Map() iMap.name = self.range_subject.seqname iMap.seqname = self.range_query.seqname if self.range_subject.isOnDirectStrand(): iMap.start = self.range_query.start iMap.end = self.range_query.end else: iMap.start = self.range_query.end iMap.end = self.range_query.start return iMap
def convertMapFileIntoSetFile( mapFileName, setFileName = "" ): if setFileName == "": setFileName = "%s.set" % mapFileName mapFileHandler = open( mapFileName, "r" ) setFileHandler = open( setFileName, "w" ) iMap = Map() count = 0 while True: line = mapFileHandler.readline() if line == "": break iMap.setFromString(line) count += 1 iSet = Set() iSet.id = count iSet.name = iMap.getName() iSet.seqname = iMap.getSeqname() iSet.start = iMap.getStart() iSet.end = iMap.getEnd() iSet.write(setFileHandler) mapFileHandler.close() setFileHandler.close()
def merge(self, o): if self.seqname == o.seqname: Map.merge(self, o) self.id = min(self.id, o.id)
def toString(self): string = "%i" % (self.id) string += "\t%s" % (Map.toString(self)) return string
def setFromTuple(self, tuple): self.id = int(tuple[0]) Map.setFromTuple(self, tuple[1:])
def __eq__(self, o): if self.id != o.id: return False else: return Map.__eq__(self, o)
def __init__(self, id=-1, name="", seqname="", start=-1, end=-1): Map.__init__( self, name, seqname, start, end ) self.id = id
def removeRedundantMembersDueToChunkOverlaps( dGroupId2MemberHeaders, dGr2Mb, chunkOverlap ): # for each group for groupID in dGroupId2MemberHeaders.keys(): if verbose > 1: print "group %s:" % ( groupID ) if groupID not in [ "3446" ]: #continue pass # get members into Map object, per chunk name dChunkName2Map = {} for memberH in dGroupId2MemberHeaders[ groupID ]: if verbose > 1: print memberH tokens = memberH.split(" ") if "," not in tokens[3]: m = Map() m.name = tokens[0] m.seqname = tokens[1] m.start = int( tokens[3].split("..")[0] ) m.end = int( tokens[3].split("..")[1] ) dChunkName2Map[ m.seqname ] = [ m ] else: dChunkName2Map[ tokens[1] ] = [] for i in tokens[3].split(","): m = Map() m.name = tokens[0] m.seqname = tokens[1] m.start = int( i.split("..")[0] ) m.end = int( i.split("..")[1] ) dChunkName2Map[ m.seqname ].append( m ) # remove chunks without previous or next chunks for chunkName in dChunkName2Map.keys(): chunkId = int( chunkName.split("chunk")[1] ) if not ( dChunkName2Map.has_key( "chunk%i" % ( chunkId + 1 ) ) \ or dChunkName2Map.has_key( "chunk%i" % ( chunkId - 1 ) ) ): del dChunkName2Map[ chunkName ] continue # for each pair of chunk overlap, remove one chunk lChunkNames = dChunkName2Map.keys() lChunkNames.sort() out = [] for i in range(0,len(lChunkNames), 2): del dChunkName2Map[ lChunkNames[i] ] # remove members outside chunk overlap (~< 10000 bp) for chunkName in dChunkName2Map.keys(): out = [] for index, m in enumerate( dChunkName2Map[ chunkName ][:] ): if m.getMax() <= 1.1 * chunkOverlap: out.append( dChunkName2Map[ chunkName ][ index ] ) dChunkName2Map[ chunkName ] = out if len(dChunkName2Map[ chunkName ]) == 0: del dChunkName2Map[ chunkName ] if verbose > 1: print "all members:", dGr2Mb[ groupID ] print "chunks to clean:", dChunkName2Map.keys() lMembersToRemove = [] for i in dChunkName2Map.keys(): for j in dChunkName2Map[ i ]: mbId = j.name.split("Gr")[0] if "Q" in mbId: mbId = mbId.split("Q")[1] elif "S" in mbId: mbId = mbId.split("S")[1] lMembersToRemove.append( mbId ) out = [] for index, k in enumerate( dGr2Mb[ groupID ][:] ): if k not in lMembersToRemove: out.append( dGr2Mb[ groupID ][index] ) dGr2Mb[ groupID ] = out if verbose > 1: print "members to keep:", dGr2Mb[ groupID ] sys.stdout.flush() return dGr2Mb