def open_possibly_compressed_file(filename): """ Read a file. It might be compressed. """ #if isinstance(filename, file): # return filename #Whatever #peek, f = peek_and_pipe(open_possibly_remote_file(filename), 4) from nesoni import sam if sam.is_bam(filename): return sam.open_bam(filename) f = open(filename, 'rb') peek = f.read(4) f.close() #f = open(filename,'rb') if peek.startswith('\x1f\x8b'): #command = 'gunzip' return gzip.open(filename, 'rb') elif peek.startswith('BZh'): #command = 'bunzip2' return bz2.BZFile(filename, 'rb') else: #command = None return open(filename, 'rb')
def open_possibly_compressed_file(filename): """ Read a file. It might be compressed. """ #if isinstance(filename, file): # return filename #Whatever #peek, f = peek_and_pipe(open_possibly_remote_file(filename), 4) from nesoni import sam if sam.is_bam(filename): return sam.open_bam(filename) f = open(filename,'rb') peek = f.read(4) f.close() #f = open(filename,'rb') if peek.startswith('\x1f\x8b'): #command = 'gunzip' return gzip.open(filename, 'rb') elif peek.startswith('BZh'): #command = 'bunzip2' return bz2.BZFile(filename, 'rb') else: #command = None return open(filename, 'rb')
def get_compression_type(filename): if hasattr(filename, 'read'): return 'none' #It's already file-like from nesoni import sam f = open(filename,'rb') peek = f.read(4) f.close() if peek.startswith('\x1f\x8b'): #gzip format if sam.is_bam(filename): #it might be a BAM return 'bam' return 'gzip' elif peek.startswith('BZh'): #bzip2 format return 'bzip2' else: return 'none'
def get_compression_type(filename): if hasattr(filename, 'read'): return 'none' #It's already file-like from nesoni import sam f = open(filename, 'rb') peek = f.read(4) f.close() if peek.startswith('\x1f\x8b'): #gzip format if sam.is_bam(filename): #it might be a BAM return 'bam' return 'gzip' elif peek.startswith('BZh'): #bzip2 format return 'bzip2' else: return 'none'
def run(self): workspace = working_directory.Working(self.output_dir) workspace.setup_reference(self.reference) workspace.update_param(snp_cost=self.snp_cost) # assert os.path.exists(self.reference), 'Reference file does not exist' # reference_filename = workspace._object_filename('reference.fa') # if os.path.exists(reference_filename): # os.unlink(reference_filename) # os.symlink(os.path.relpath(self.reference, self.output_dir), reference_filename) bam_filename = io.abspath(self.output_dir, "alignments.bam") bam_prefix = io.abspath(self.output_dir, "alignments") if sam.is_bam(self.input): sort_input_filename = self.input temp_filename = None else: temp_filename = io.abspath(self.output_dir, "temp.bam") sort_input_filename = temp_filename writer = io.Pipe_writer(temp_filename, ["samtools", "view", "-S", "-b", "-"]) f = open(self.input, "rb") while True: data = f.read(1 << 20) if not data: break writer.write(data) writer.close() f.close() grace.status("Sort") # io.execute([ # 'samtools', 'sort', '-n', sort_input_filename, bam_prefix # ]) sam.sort_bam(sort_input_filename, bam_prefix, by_name=True) if temp_filename is not None: os.unlink(temp_filename) grace.status("")
def run(self): workspace = working_directory.Working(self.output_dir) workspace.setup_reference(self.reference) workspace.update_param(snp_cost = self.snp_cost) #assert os.path.exists(self.reference), 'Reference file does not exist' #reference_filename = workspace._object_filename('reference.fa') #if os.path.exists(reference_filename): # os.unlink(reference_filename) #os.symlink(os.path.relpath(self.reference, self.output_dir), reference_filename) bam_filename = io.abspath(self.output_dir, 'alignments.bam') bam_prefix = io.abspath(self.output_dir, 'alignments') if sam.is_bam(self.input): sort_input_filename = self.input temp_filename = None else: temp_filename = io.abspath(self.output_dir, 'temp.bam') sort_input_filename = temp_filename writer = io.Pipe_writer(temp_filename, ['samtools', 'view', '-S', '-b', '-']) f = open(self.input, 'rb') while True: data = f.read(1<<20) if not data: break writer.write(data) writer.close() f.close() grace.status('Sort') #io.execute([ # 'samtools', 'sort', '-n', sort_input_filename, bam_prefix #]) sam.sort_bam(sort_input_filename, bam_prefix, by_name=True) if temp_filename is not None: os.unlink(temp_filename) grace.status('')
def run(self): workspace = working_directory.Working(self.output_dir) workspace.setup_reference(self.reference) workspace.update_param(snp_cost = self.snp_cost) #assert os.path.exists(self.reference), 'Reference file does not exist' #reference_filename = workspace._object_filename('reference.fa') #if os.path.exists(reference_filename): # os.unlink(reference_filename) #os.symlink(os.path.relpath(self.reference, self.output_dir), reference_filename) bam_filename = io.abspath(self.output_dir, 'alignments.bam') bam_prefix = io.abspath(self.output_dir, 'alignments') if sam.is_bam(self.input): sort_input_filename = self.input temp_filename = None else: temp_filename = io.abspath(self.output_dir, 'temp.bam') sort_input_filename = temp_filename writer = io.Pipe_writer(temp_filename, ['samtools', 'view', '-S', '-b', '-']) f = open(self.input, 'rb') while True: data = f.read(1<<20) if not data: break writer.write(data) writer.close() f.close() grace.status('Sort') io.execute([ 'samtools', 'sort', '-n', sort_input_filename, bam_prefix ]) if temp_filename is not None: os.unlink(temp_filename) grace.status('')
def run(self): bams = [ ] reference = None reference2 = None extra = [ ] for sample in self.samples: if sam.is_bam(sample): bams.append(sample) elif os.path.isdir(sample): working = working_directory.Working(sample,True) bams.append( working.get_filtered_sorted_bam() ) extra.append( '##sampleTags=' + ','.join(working.get_tags()) ) if reference2 is None: reference2 = working.get_reference().reference_fasta_filename() elif io.is_sequence_file(sample): assert reference is None, 'Only one reference FASTA file allowed.' reference = sample if reference is None: reference = reference2 if reference is None: raise grace.Error('No reference FASTA file given.') with nesoni.Stage() as stage: tempspace = stage.enter( workspace.tempspace() ) if self.depth_limit: with nesoni.Stage() as stage2: for i in xrange(len(bams)): sam.Bam_depth_limit( tempspace/('%d'%i), bams[i], depth=self.depth_limit ).process_make(stage2) bams[i] = tempspace/('%d.bam'%i) # FreeBayes claims to handle multiple bams, but it doesn't actually work if len(bams) > 1: sam.Bam_merge(tempspace/'merged', bams=bams, index=False).run() bams = [ tempspace/'merged.bam' ] command = [ 'freebayes', '-f', reference, '--ploidy',str(self.ploidy), '--pvar',str(self.pvar), ] + self.freebayes_options + bams self.log.log('Running: '+' '.join(command)+'\n') f_out = stage.enter( open(self.prefix+'.vcf','wb') ) f_in = stage.enter( io.pipe_from(command) ) done_extra = False for line in f_in: if not done_extra and not line.startswith('##'): for extra_line in extra: f_out.write(extra_line+'\n') done_extra = True f_out.write(line) index_vcf(self.prefix+'.vcf')
def run(self): bams = [] reference = None reference2 = None extra = [] for sample in self.samples: if sam.is_bam(sample): bams.append(sample) elif os.path.isdir(sample): working = working_directory.Working(sample, True) bams.append(working.get_filtered_sorted_bam()) extra.append('##sampleTags=' + ','.join(working.get_tags())) if reference2 is None: reference2 = working.get_reference( ).reference_fasta_filename() elif io.is_sequence_file(sample): assert reference is None, 'Only one reference FASTA file allowed.' reference = sample if reference is None: reference = reference2 if reference is None: raise grace.Error('No reference FASTA file given.') with nesoni.Stage() as stage: tempspace = stage.enter(workspace.tempspace()) if self.depth_limit: with nesoni.Stage() as stage2: for i in xrange(len(bams)): sam.Bam_depth_limit( tempspace / ('%d' % i), bams[i], depth=self.depth_limit).process_make(stage2) bams[i] = tempspace / ('%d.bam' % i) # FreeBayes claims to handle multiple bams, but it doesn't actually work if len(bams) > 1: sam.Bam_merge(tempspace / 'merged', bams=bams, index=False).run() bams = [tempspace / 'merged.bam'] command = [ 'freebayes', '-f', reference, '--ploidy', str(self.ploidy), '--pvar', str(self.pvar), ] + self.freebayes_options + bams self.log.log('Running: ' + ' '.join(command) + '\n') f_out = stage.enter(open(self.prefix + '.vcf', 'wb')) f_in = stage.enter(io.pipe_from(command)) done_extra = False for line in f_in: if not done_extra and not line.startswith('##'): for extra_line in extra: f_out.write(extra_line + '\n') done_extra = True f_out.write(line) index_vcf(self.prefix + '.vcf')