def main(): parser = optparse.OptionParser() parser.add_option('-S', '--sam', action="store_true", dest="is_sam") parser.add_option('-B', '--bam', action="store_true", dest="is_bam") options, args = parser.parse_args() if options.is_bam: input_fname = args[0] index_fname = args[1] out_fname = args[2] samfile = csamtools.Samfile(filename=input_fname, mode='rb', index_filename=index_fname) elif options.is_sam: input_fname = args[0] out_fname = args[1] samfile = csamtools.Samfile(filename=input_fname, mode='r') st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30) for read in samfile.fetch(): st.insert_range(samfile.getrname(read.rname), read.pos, read.pos + read.rlen) st.write(out_fname)
finally: os.unlink(dataset_symlink) stderr = open(stderr_name).read().strip() if stderr: if exit_code != 0: os.unlink(stderr_name) # clean up raise Exception("Error Setting BAM Metadata: %s" % stderr) else: print stderr dataset.metadata.bam_index = index_file # Remove temp file os.unlink(stderr_name) # Now use pysam with BAI index to determine additional metadata try: bam_file = csamtools.Samfile(filename=dataset.file_name, mode='rb', index_filename=index_file.file_name) dataset.metadata.reference_names = list(bam_file.references) dataset.metadata.reference_lengths = list(bam_file.lengths) dataset.metadata.bam_header = bam_file.header dataset.metadata.read_groups = [ read_group['ID'] for read_group in dataset.metadata.bam_header.get('RG', []) if 'ID' in read_group ] dataset.metadata.sort_order = dataset.metadata.bam_header.get( 'HD', {}).get('SO', None) dataset.metadata.bam_version = dataset.metadata.bam_header.get( 'HD', {}).get('VN', None) except: pass
def set_meta(self, dataset, overwrite=True, **kwd): """ Creates the index for the BAM file. """ # These metadata values are not accessible by users, always overwrite index_file = dataset.metadata.bam_index if not index_file: index_file = dataset.metadata.spec['bam_index'].param.new_file( dataset=dataset) # Create the Bam index # $ samtools index # Usage: samtools index <in.bam> [<out.index>] stderr_name = tempfile.NamedTemporaryFile( prefix="bam_index_stderr").name command = [ 'samtools', 'index', dataset.file_name, index_file.file_name ] exit_code = subprocess.call(args=command, stderr=open(stderr_name, 'wb')) # Did index succeed? if exit_code == -6: # SIGABRT, most likely samtools 1.0+ which does not accept the index name parameter. dataset_symlink = os.path.join( os.path.dirname(index_file.file_name), '__dataset_%d_%s' % (dataset.id, os.path.basename(index_file.file_name))) os.symlink(dataset.file_name, dataset_symlink) try: command = ['samtools', 'index', dataset_symlink] exit_code = subprocess.call(args=command, stderr=open(stderr_name, 'wb')) shutil.move(dataset_symlink + '.bai', index_file.file_name) except Exception as e: open(stderr_name, 'ab+').write( 'Galaxy attempted to build the BAM index with samtools 1.0+ but failed: %s\n' % e) exit_code = 1 # Make sure an exception raised by shutil.move() is re-raised below finally: os.unlink(dataset_symlink) stderr = open(stderr_name).read().strip() if stderr: if exit_code != 0: os.unlink(stderr_name) # clean up raise Exception("Error Setting BAM Metadata: %s" % stderr) else: print stderr dataset.metadata.bam_index = index_file # Remove temp file os.unlink(stderr_name) # Now use pysam with BAI index to determine additional metadata try: bam_file = csamtools.Samfile(filename=dataset.file_name, mode='rb', index_filename=index_file.file_name) dataset.metadata.reference_names = list(bam_file.references) dataset.metadata.reference_lengths = list(bam_file.lengths) dataset.metadata.bam_header = bam_file.header dataset.metadata.read_groups = [ read_group['ID'] for read_group in dataset.metadata.bam_header.get('RG', []) if 'ID' in read_group ] dataset.metadata.sort_order = dataset.metadata.bam_header.get( 'HD', {}).get('SO', None) dataset.metadata.bam_version = dataset.metadata.bam_header.get( 'HD', {}).get('VN', None) except: pass