def build_index(self, filename, indexfile): """ Recipe from Brad Chapman's blog <http://bcbio.wordpress.com/2009/07/26/sorting-genomic-alignments-using-python/> """ indexes = interval_index_file.Indexes() in_handle = open(filename) reader = maf.Reader(in_handle) while True: pos = reader.file.tell() rec = next(reader) if rec is None: break for c in rec.components: indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size, ) index_handle = open(indexfile, "w") indexes.write(index_handle) index_handle.close()
def main(): # Parse command line options, args = doc_optparse.parse( __doc__ ) try: maf_file = args[0] # If it appears to be a bz2 file, attempt to open with table if maf_file.endswith( ".bz2" ): table_file = maf_file + "t" if not os.path.exists( table_file ): doc_optparse.exit( "To index bz2 compressed files first " "create a bz2t file with bzip-table." ) # Open with SeekableBzip2File so we have tell support maf_in = SeekableBzip2File( maf_file, table_file ) # Strip .bz2 from the filename before adding ".index" maf_file = maf_file[:-4] elif maf_file.endswith( ".lzo" ): from bx.misc.seeklzop import SeekableLzopFile table_file = maf_file + "t" if not os.path.exists( table_file ): doc_optparse.exit( "To index lzo compressed files first " "create a lzot file with lzop_build_offset_table." ) # Open with SeekableBzip2File so we have tell support maf_in = SeekableLzopFile( maf_file, table_file ) # Strip .lzo from the filename before adding ".index" maf_file = maf_file[:-4] else: maf_in = open( maf_file ) # Determine the name of the index file if len( args ) > 1: index_file = args[1] else: index_file = maf_file + ".index" if options.species: species = options.species.split( "," ) else: species = None except: doc_optparse.exception() maf_reader = bx.align.maf.Reader( maf_in ) indexes = interval_index_file.Indexes() # Need to be a bit tricky in our iteration here to get the 'tells' right while 1: pos = maf_reader.file.tell() block = maf_reader.next() if block is None: break for c in block.components: if species is not None and c.src.split('.')[0] not in species: continue indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size ) out = open( index_file, 'w' ) indexes.write( out ) out.close()
def __init__( self, axt_filename, index_filename=None, keep_open=False, species1 = None, species2=None, species_to_lengths=None, support_ids=False ): if index_filename is None: index_filename = axt_filename + ".index" self.indexes = interval_index_file.Indexes( filename=index_filename ) self.axt_filename = axt_filename # nota bene: (self.species1 = species1 or "species1") is incorrect if species1="" self.species1 = species1 if (self.species1 == None): self.species1 = "species1" self.species2 = species2 if (self.species2 == None): self.species2 = "species2" self.species_to_lengths = species_to_lengths self.support_ids = support_ids # for extra text at end of axt header lines if keep_open: self.f = open( axt_filename ) else: self.f = None
def index(gff_file, index_file=None): index = interval_index_file.Indexes() with open(gff_file) as in_handle: while 1: pos = in_handle.tell() line = in_handle.readline() if not line: break if not line.startswith("#"): parts = line.split("\t") (seqid, gtype, source, start, end) = parts[:5] index.add(seqid, int(start), int(end), pos) if index_file is None: index_file = gff_file + ".index" with open(index_file, "w") as index_handle: index.write(index_handle) return index_file
def build_index(in_file, index_file): indexes = interval_index_file.Indexes() with open(in_file) as in_handle: reader = maf.Reader(in_handle) while 1: pos = reader.file.tell() rec = reader.next() if rec is None: break for c in rec.components: indexes.add(c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size) with open(index_file, "w") as index_handle: indexes.write(index_handle)
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) if options.version: return try: wiggle_file = args[0] # If it appears to be a bz2 file, attempt to open with table if wiggle_file.endswith(".bz2"): table_file = wiggle_file + "t" if not os.path.exists(table_file): doc_optparse.exit("To index bz2 compressed files first " "create a bz2t file with bzip-table.") # Open with SeekableBzip2File so we have tell support wiggle_in = SeekableBzip2File(wiggle_file, table_file) # Strip .bz2 from the filename before adding ".index" wiggle_file = wiggle_file[:-4] elif wiggle_file.endswith(".lzo"): from bx.misc.seeklzop import SeekableLzopFile table_file = wiggle_file + "t" if not os.path.exists(table_file): doc_optparse.exit("To index lzo compressed files first " "create a lzot file with bzip-table.") # Open with SeekableBzip2File so we have tell support wiggle_in = SeekableLzopFile(wiggle_file, table_file) # Strip .lzo from the filename before adding ".index" wiggle_file = wiggle_file[:-4] else: wiggle_in = open(wiggle_file) # Determine the name of the index file if len(args) > 1: index_file = args[1] else: index_file = wiggle_file + ".index" except: doc_optparse.exception() indexes = interval_index_file.Indexes() # Can't use the iterator, as there is no next() and thus # no way to access the positions. The following code is # modified from wiggle.py last_chrom = None start = None end = None first_pos = None # always for wiggle data strand = '+' mode = "bed" while 1: pos = wiggle_in.tell() line = wiggle_in.readline() if not line: break if line.isspace() or line.startswith("track") or line.startswith( "#") or line.startswith("browser"): continue elif line.startswith("bed"): indexes.add(fields[0], int(fields[1]), int(fields[2]), pos) elif line.startswith("variableStep") or line.startswith("fixedStep"): if first_pos != None: indexes.add(last_chrom, start, end, first_pos) first_pos = pos header = bx.wiggle.parse_header(line) last_chrom = header['chrom'] start = int(header['start']) - 1 end = start current_step = None if 'span' in header: current_span = int(header['span']) else: current_span = 1 if 'step' in header: current_step = int(header['step']) if line.startswith("variableStep"): mode = "variableStep" else: mode = "fixedStep" elif mode == "variableStep": fields = line.split() end = int(fields[0]) - 1 + current_span elif mode == "fixedStep": end += current_step else: raise "Unexpected input line: %s" % line.strip() out = open(index_file, 'w') indexes.write(out) out.close()