def fromMap(self, map_query2target, use_strand=None): """return a map between query to target.""" self.mNMatches = map_query2target.getNumAligned() f = alignlib_lite.py_AlignmentFormatBlat(map_query2target) self.mQueryFrom, self.mQueryTo, self.mSbjctFrom, self.mSbjctTo, \ self.mQueryBlockStarts, self.mSbjctBlockStarts, self.mBlockSizes = str( f).split("\t") if self.mBlockSizes: self.mBlockSizes = map(int, self.mBlockSizes[:-1].split(",")) self.mQueryBlockStarts = map( int, self.mQueryBlockStarts[:-1].split(",")) self.mSbjctBlockStarts = map( int, self.mSbjctBlockStarts[:-1].split(",")) else: self.mBlockSizes = [] self.mQueryBlockStarts = [] self.mSbjctBlockStarts = [] self.mNBlocks = len(self.mBlockSizes) self.mQueryFrom, self.mQueryTo, self.mSbjctFrom, self.mSbjctTo = \ map(int, (self.mQueryFrom, self.mQueryTo, self.mSbjctFrom, self.mSbjctTo)) # queryfrom and queryto are always forward strand coordinates if use_strand and self.strand == "-": self.mQueryFrom, self.mQueryTo = self.mQueryLength - \ self.mQueryTo, self.mQueryLength - self.mQueryFrom
def fromMap(self, map_query2target, use_strand=None): """return a map between query to target.""" self.mNMatches = map_query2target.getNumAligned() f = str(alignlib_lite.py_AlignmentFormatBlat(map_query2target)) self.mQueryFrom, self.mQueryTo, self.mSbjctFrom, self.mSbjctTo, \ self.mQueryBlockStarts, self.mSbjctBlockStarts, self.mBlockSizes = f.split( "\t") if self.mBlockSizes: self.mBlockSizes = list(map(int, self.mBlockSizes[:-1].split(","))) self.mQueryBlockStarts = list( map(int, self.mQueryBlockStarts[:-1].split(","))) self.mSbjctBlockStarts = list( map(int, self.mSbjctBlockStarts[:-1].split(","))) else: self.mBlockSizes = [] self.mQueryBlockStarts = [] self.mSbjctBlockStarts = [] self.mNBlocks = len(self.mBlockSizes) self.mQueryFrom, self.mQueryTo, self.mSbjctFrom, self.mSbjctTo = \ list(map(int, (self.mQueryFrom, self.mQueryTo, self.mSbjctFrom, self.mSbjctTo))) # queryfrom and queryto are always forward strand coordinates if use_strand and self.strand == "-": self.mQueryFrom, self.mQueryTo = self.mQueryLength - \ self.mQueryTo, self.mQueryLength - self.mQueryFrom
def getMapTarget2Query(self): """return a map between target to query. If the strand is "-", the coordinates for query are on the negative strand. """ map_target2query = alignlib_lite.py_makeAlignmentBlocks() f = alignlib_lite.py_AlignmentFormatBlat( "%i\t%i\t%i\t%i\t%s\t%s\t%s\n" % (min(self.mSbjctBlockStarts), max(self.mSbjctBlockStarts), min(self.mQueryBlockStarts), max(self.mQueryBlockStarts), ",".join([str(x) for x in self.mSbjctBlockStarts]) + ",", ",".join([str(x) for x in self.mQueryBlockStarts]) + ",", ",".join([str(x) for x in self.mBlockSizes]) + ",")) f.copy(map_target2query) return map_target2query
def getMapTarget2Query(self): """return a map between target to query. If the strand is "-", the coordinates for query are on the negative strand. """ map_target2query = alignlib_lite.py_makeAlignmentBlocks() f = alignlib_lite.py_AlignmentFormatBlat("%i\t%i\t%i\t%i\t%s\t%s\t%s\n" % ( min(self.mSbjctBlockStarts), max(self.mSbjctBlockStarts), min(self.mQueryBlockStarts), max(self.mQueryBlockStarts), ",".join([str(x) for x in self.mSbjctBlockStarts]) + ",", ",".join([str(x) for x in self.mQueryBlockStarts]) + ",", ",".join([str(x) for x in self.mBlockSizes]) + ",")) f.copy(map_target2query) return map_target2query
import alignlib_lite as alignlib x = alignlib.py_makeAlignmentBlocks() x.addDiagonal( 10, 100, 0 ) print x.getNumAligned(), x.getRowFrom(), x.getRowTo() f = alignlib.py_AlignmentFormatBlat( x ) print str(f) f.copy( x ) print str(f)
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: psl2wiggle_stats.py 2781 2009-09-10 11:33:14Z andreas $", usage=globals()["__doc__"]) parser.add_option("-g", "--genome-file", dest="genome_file", type="string", help="filename with genome.") parser.add_option("--wiggle-files", dest="wiggle_files", type="string", help="glob expression for wiggle files [%default].") parser.add_option("--prefix", dest="prefix", type="string", help="prefix to add to contig names before lookup [%default].") parser.add_option("-z", "--from-zipped", dest="from_zipped", action="store_true", help="input is zipped.") parser.add_option("--test", dest="test", type="int", help="test - stop after # rows of parsing [%default].") parser.add_option("--with-values", dest="with_values", action="store_true", help="output values in last column [%default].") parser.set_defaults(wiggle_files="*.data.bz2", from_zipped=False, prefix="", with_values=False, test=None) (options, args) = E.Start(parser, add_pipe_options=True) # open indexed access to wiggles wiggle_files = glob.glob(options.wiggle_files) if not wiggle_files: raise IOError("could not find wiggle files with '%s'" % options.wiggle_files) index = Wiggle.WiggleMultiIndexedAccess(wiggle_files, keep_open=True, use_cache=False) iterator = Blat.BlatIterator(sys.stdin) ninput, noutput, nskipped = 0, 0, 0 options.stdout.write( "query\tnali\t%s" % ("\t".join(Stats.DistributionalParameters().getHeaders()))) if options.with_values: options.stdout.write("\tvalues") options.stdout.write("\n") while 1: if options.test and ninput >= options.test: break match = iterator.next() if match is None: break ninput += 1 if options.loglevel >= 2: options.stdlog.write(str(match) + "\n") # psl always matches on the forward strand map_genome2query = alignlib_lite.py_makeAlignmentBlocks() f = alignlib_lite.py_AlignmentFormatBlat("%i\t%i\t%i\t%i\t%s\t%s\t%s\n" % ( match.mSbjctFrom, match.mSbjctTo, match.mQueryFrom, match.mQueryTo, match.mSbjctBlockStarts, match.mQueryBlockStarts, match.mBlockSizes)) f.copy(map_genome2query) data = index.get(options.prefix + match.mSbjctId, match.mSbjctFrom, match.mSbjctTo) values = [] for x, vv in data: for v in vv: if map_genome2query.mapRowToCol(x) >= 0: values.append(v) x += 1 if len(values) == 0: nskipped += 1 continue noutput += 1 if options.loglevel >= 2: options.stdlog.write( "# %s\n" % ",".join(["%5.3f" % v for v in values])) s = Stats.DistributionalParameters(values) options.stdout.write("%s\t%i\t%s" % (match.mQueryId, match.mNMismatches + match.mNMatches, str(s))) if options.with_values: options.stdout.write( "\t%s" % ",".join(["%5.3f" % v for v in values])) options.stdout.write("\n") if options.loglevel >= 1: options.stdlog.write( "# ninput=%i, noutput=%i, nskipped=%i\n" % (ninput, noutput, nskipped)) E.Stop()