Beispiel #1
0
 def getMapInstance( self ):
     iMap = Map()
     lAttributes = []
     lAttributes.append( self.name )
     lAttributes.append( self.seqname )
     lAttributes.append( self.start )
     lAttributes.append( self.end )
     iMap.setFromTuple( lAttributes )
     return iMap
Beispiel #2
0
 def getDictPerSeqNameFromMapFile( mapFile ):
     dSeqName2Maps = {}
     mapFileHandler = open( mapFile, "r" )
     while True:
         line = mapFileHandler.readline()
         if line == "":
             break
         iMap = Map()
         iMap.setFromString( line, "\t" )
         if not dSeqName2Maps.has_key( iMap.seqname ):
             dSeqName2Maps[ iMap.seqname ] = []
         dSeqName2Maps[ iMap.seqname ].append( iMap )
     mapFileHandler.close()
     return dSeqName2Maps
 def createAndLoadTable(self):
     """
     Create the table and load the map data from input table
     """
     self.db.create_table(self.db, self.tableName, "", "map")
     f = open(self.inputFileName, "r")
     iMap = Map()
     lMap = []
     while iMap.read(f):
         lMap.append(iMap)
         iMap = Map()
     f.close()
     self._tMapA = TableMapAdaptator(self.db, self.tableName)
     self._tMapA.insMapList(lMap)
Beispiel #4
0
 def mergeMatchesOnQueries(inFile, outFile):
     mapFile = "%s.map" % ( inFile )
     PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly( inFile, mapFile )
     cmd = "mapOp"
     cmd += " -q %s" % ( mapFile )
     cmd += " -m"
     cmd += " 2>&1 > /dev/null"
     exitStatus = os.system( cmd )
     if exitStatus != 0:
         print "ERROR: mapOp returned %i" % ( exitStatus )
         sys.exit(1)
     os.remove( mapFile )
     mergeFile = "%s.merge" % ( mapFile )
     mergeFileHandler = open( mergeFile, "r" )
     outFileHandler = open( outFile, "w" )
     m = Map()
     while True:
         line = mergeFileHandler.readline()
         if line == "":
             break
         m.reset()
         m.setFromString( line, "\t" )
         m.writeAsQueryOfPath( outFileHandler )
     mergeFileHandler.close()
     os.remove( mergeFile )
     outFileHandler.close()
Beispiel #5
0
 def getDictPerNameFromMapFile( mapFile ):
     dName2Maps = {}
     mapFileHandler = open( mapFile, "r" )
     while True:
         line = mapFileHandler.readline()
         if line == "":
             break
         iMap = Map()
         iMap.setFromString( line, "\t" )
         if dName2Maps.has_key( iMap.name ):
             if iMap == dName2Maps[ iMap.name ]:
                 continue
             else:
                 msg = "ERROR: in file '%s' two different Map instances have the same name '%s'" % ( mapFile, iMap.name )
                 sys.stderr.write( "%s\n" % ( msg ) )
                 sys.exit(1)
         dName2Maps[ iMap.name ] = iMap
     mapFileHandler.close()
     return dName2Maps
Beispiel #6
0
 def diff(self, o):
     iMap = Map.diff(self, o.getMap())
     new = Set()
     if not iMap.isEmpty():
         new.id = self.id
         new.name = self.name
         new.seqname = self.seqname
         new.start = iMap.start
         new.end = iMap.end
     return new
Beispiel #7
0
 def getSubjectAsMapOfQuery(self):
     iMap = Map()
     iMap.name = self.range_subject.seqname
     iMap.seqname = self.range_query.seqname
     if self.range_subject.isOnDirectStrand():
         iMap.start = self.range_query.start
         iMap.end = self.range_query.end
     else:
         iMap.start = self.range_query.end
         iMap.end = self.range_query.start
     return iMap
Beispiel #8
0
 def convertMapFileIntoSetFile( mapFileName, setFileName = "" ):
     if setFileName == "":
         setFileName = "%s.set" % mapFileName
     mapFileHandler = open( mapFileName, "r" )
     setFileHandler = open( setFileName, "w" )
     iMap = Map()
     count = 0
     while True:
         line = mapFileHandler.readline()
         if line == "":
             break
         iMap.setFromString(line)
         count += 1
         iSet = Set()
         iSet.id = count
         iSet.name = iMap.getName()
         iSet.seqname = iMap.getSeqname()
         iSet.start = iMap.getStart()
         iSet.end = iMap.getEnd()
         iSet.write(setFileHandler)
     mapFileHandler.close()
     setFileHandler.close()
Beispiel #9
0
 def merge(self, o):
     if self.seqname == o.seqname:
         Map.merge(self, o)
         self.id = min(self.id, o.id)
Beispiel #10
0
 def toString(self):
     string = "%i" % (self.id)
     string += "\t%s" % (Map.toString(self))
     return string
Beispiel #11
0
 def setFromTuple(self, tuple):
     self.id = int(tuple[0])
     Map.setFromTuple(self, tuple[1:])
Beispiel #12
0
 def __eq__(self, o):
     if self.id != o.id:
         return False
     else:
         return Map.__eq__(self, o)
Beispiel #13
0
 def __init__(self, id=-1, name="", seqname="", start=-1, end=-1):
     Map.__init__( self, name, seqname, start, end )
     self.id = id
def removeRedundantMembersDueToChunkOverlaps( dGroupId2MemberHeaders, dGr2Mb, chunkOverlap ):
    # for each group
    for groupID in dGroupId2MemberHeaders.keys():
        if verbose > 1:
            print "group %s:" % ( groupID )
        if groupID not in [ "3446" ]:
            #continue
            pass

        # get members into Map object, per chunk name
        dChunkName2Map = {}
        for memberH in dGroupId2MemberHeaders[ groupID ]:
            if verbose > 1: print memberH
            tokens = memberH.split(" ")
            if "," not in tokens[3]:
                m = Map()
                m.name = tokens[0]
                m.seqname = tokens[1]
                m.start = int( tokens[3].split("..")[0] )
                m.end = int( tokens[3].split("..")[1] )
                dChunkName2Map[ m.seqname ] = [ m ]
            else:
                dChunkName2Map[ tokens[1] ] = []
                for i in tokens[3].split(","):
                    m = Map()
                    m.name = tokens[0]
                    m.seqname = tokens[1]
                    m.start = int( i.split("..")[0] )
                    m.end = int( i.split("..")[1] )
                    dChunkName2Map[ m.seqname ].append( m )

        # remove chunks without previous or next chunks
        for chunkName in dChunkName2Map.keys():
            chunkId = int( chunkName.split("chunk")[1] )
            if not ( dChunkName2Map.has_key( "chunk%i" % ( chunkId + 1 ) ) \
                     or dChunkName2Map.has_key( "chunk%i" % ( chunkId - 1 ) ) ):
                del dChunkName2Map[ chunkName ]
                continue

        # for each pair of chunk overlap, remove one chunk
        lChunkNames = dChunkName2Map.keys()
        lChunkNames.sort()
        out = []
        for i in range(0,len(lChunkNames), 2):
            del dChunkName2Map[ lChunkNames[i] ]

        # remove members outside chunk overlap (~< 10000 bp)
        for chunkName in dChunkName2Map.keys():
            out = []
            for index, m in enumerate( dChunkName2Map[ chunkName ][:] ):
                if m.getMax() <= 1.1 * chunkOverlap:
                    out.append( dChunkName2Map[ chunkName ][ index ] )
            dChunkName2Map[ chunkName ] = out
            if len(dChunkName2Map[ chunkName ]) == 0:
                del dChunkName2Map[ chunkName ]

        if verbose > 1:
            print "all members:", dGr2Mb[ groupID ]
            print "chunks to clean:", dChunkName2Map.keys()
        lMembersToRemove = []
        for i in dChunkName2Map.keys():
            for j in dChunkName2Map[ i ]:
                mbId = j.name.split("Gr")[0]
                if "Q" in mbId:
                    mbId = mbId.split("Q")[1]
                elif "S" in mbId:
                    mbId = mbId.split("S")[1]
                lMembersToRemove.append( mbId )
        out = []
        for index, k in enumerate( dGr2Mb[ groupID ][:] ):
            if k not in lMembersToRemove:
                out.append( dGr2Mb[ groupID ][index] )
        dGr2Mb[ groupID ] = out
        if verbose > 1:
            print "members to keep:", dGr2Mb[ groupID ]
            sys.stdout.flush()

    return dGr2Mb