Beispiel #1
0
 def spliceFromCoords( genomeFile, coordFile, obsFile ):
     genomeFileHandler = open( genomeFile, "r" )
     obsFileHandler = open( obsFile, "w" )
     dChr2Maps = MapUtils.getDictPerSeqNameFromMapFile( coordFile )
     
     while True:
         bs = Bioseq()
         bs.read( genomeFileHandler )
         if bs.sequence == None:
             break
         if dChr2Maps.has_key( bs.header ):
             lCoords = MapUtils.getMapListSortedByIncreasingMinThenMax( dChr2Maps[ bs.header ] )
             splicedSeq = ""
             currentSite = 0
             for iMap in lCoords:
                 minSplice = iMap.getMin() - 1
                 if minSplice > currentSite:
                     splicedSeq += bs.sequence[ currentSite : minSplice ]
                 currentSite = iMap.getMax()
             splicedSeq += bs.sequence[ currentSite : ]
             bs.sequence = splicedSeq
         bs.write( obsFileHandler )
         
     genomeFileHandler.close()
     obsFileHandler.close()
Beispiel #2
0
 def convertFastaHeadersFromChkToChr( inFile, mapFile, outFile ):
     inFileHandler = open( inFile, "r" )
     outFileHandler = open( outFile, "w" )
     dChunk2Map = MapUtils.getDictPerNameFromMapFile( mapFile )
     iConvCoord = ConvCoord()
     while True:
         line = inFileHandler.readline()
         if line == "":
             break
         if line[0] == ">":
             if "{Fragment}" not in line:
                 outFileHandler.write( line )
                 continue
             chkName = line.split(" ")[1]
             chrName = dChunk2Map[ chkName ].seqname
             lCoordPairs = line.split(" ")[3].split(",")
             lRangesOnChk = []
             for i in lCoordPairs:
                 iRange = Range( chkName, int(i.split("..")[0]), int(i.split("..")[1]) )
                 lRangesOnChk.append( iRange )
             lRangesOnChr = []
             for iRange in lRangesOnChk:
                 lRangesOnChr.append( iConvCoord.getRangeOnChromosome( iRange, dChunk2Map ) )
             newHeader = line[1:-1].split(" ")[0]
             newHeader += " %s" % ( chrName )
             newHeader += " {Fragment}"
             newHeader += " %i..%i" % ( lRangesOnChr[0].start, lRangesOnChr[0].end )
             for iRange in lRangesOnChr[1:]:
                 newHeader += ",%i..%i" % ( iRange.start, iRange.end )
             outFileHandler.write( ">%s\n" % ( newHeader ) )
         else:
             outFileHandler.write( line )
     inFileHandler.close()
     outFileHandler.close()
 def getSetListOverlappingCoord( self, seqName, start, end ):
     lMaps = self.getListOverlappingCoord( seqName, start, end )
     lSets = MapUtils.mapList2SetList( lMaps )
     return lSets
 def getSetListFromSeqName( self, seqName ):
     lMaps = self.getListFromSeqName( seqName )
     lSets = MapUtils.mapList2SetList( lMaps )
     return lSets