def run(self): sequences = [ ] annotations = [ ] for filename in self.filenames: any = False if io.is_sequence_file(filename): sequences.append(filename) any = True if annotation.is_annotation_file(filename): annotations.append(filename) any = True if not any: raise grace.Error(filename + ' is neither a sequence file nor an annotation file that nesoni can read.') if not sequences: assert not annotations, 'Annotations given without any reference sequences.' reference = Reference(self.output_dir, must_exist=True) else: reference = Reference(self.output_dir, must_exist=False) reference.set_sequences(sequences) reference.set_annotations(annotations) with legion.Stage() as stage: if self.genome: stage.process(reference.build_genome, self.genome_select) if config.apply_ifavailable_program(self.bowtie, 'bowtie2-build'): stage.process(reference.build_bowtie_index) if config.apply_ifavailable_program(self.ls, 'gmapper-ls'): stage.process(reference.build_shrimp_mmap, False) if config.apply_ifavailable_program(self.cs, 'gmapper-cs'): stage.process(reference.build_shrimp_mmap, True) if config.apply_ifavailable_jar(self.snpeff, 'snpEff.jar'): stage.process(reference.build_snpeff)
def run(self): sequences = [ ] annotations = [ ] for filename in self.filenames: any = False if io.is_sequence_file(filename): sequences.append(filename) any = True if annotation.is_annotation_file(filename): annotations.append(filename) any = True if not any: raise grace.Error(filename + ' is neither a sequence file nor an annotation file that nesoni can read.') if not sequences: assert not annotations, 'Annotations given without any reference sequences.' reference = Reference(self.output_dir, must_exist=True) else: reference = Reference(self.output_dir, must_exist=False) reference.set_sequences(sequences) reference.set_annotations(annotations) with open(self.log_filename(),'wb') as f: if self.ls: reference.build_shrimp_mmap(False, f) if self.cs: reference.build_shrimp_mmap(True, f) if self.bowtie: reference.build_bowtie_index(f) if self.genome: reference.build_genome(self.genome_select) if self.snpeff: reference.build_snpeff()
def run(self): base = os.path.split(self.prefix)[1] annotations = [ ] sequences = [ ] for filename in self.filenames: any = False if io.is_sequence_file(filename): sequences.append(filename) any = True if annotation.is_annotation_file(filename): annotations.append(filename) any = True assert any, 'File is neither a recognized sequence or annotation file' cytoband_filename = os.path.join(self.prefix,base+'_cytoband.txt') property_filename = os.path.join(self.prefix,'property.txt') gff_filename = os.path.join(self.prefix,base+'.gff') output_filenames = [ cytoband_filename, property_filename, gff_filename ] if not os.path.exists(self.prefix): os.mkdir(self.prefix) f = open(property_filename,'wb') print >> f, 'ordered=true' print >> f, 'id=%s' % base print >> f, 'name=%s' % (self.name or base) print >> f, 'cytobandFile=%s_cytoband.txt' % base print >> f, 'geneFile=%s.gff' % base print >> f, 'sequenceLocation=%s' % base f.close() trivia.As_gff(output=gff_filename, filenames=annotations, exclude=[ 'gene', 'source' ] ).run() f_cyt = open(cytoband_filename,'wb') for filename in sequences: for name, seq in io.read_sequences(filename): assert '/' not in name f = open(os.path.join(self.prefix, name + '.txt'), 'wb') f.write(seq) f.close() print >> f_cyt, '%s\t0\t%d' % (name, len(seq)) f_cyt.close() genome_filename = self.prefix + '.genome' if os.path.exists(genome_filename): os.unlink(genome_filename) io.execute( ['zip', '-j', io.abspath(genome_filename)] + [ io.abspath(item) for item in output_filenames ] ) for filename in output_filenames: if os.path.exists(filename): os.unlink(filename)
def run(self): sequences = [ ] annotations = [ ] for filename in self.filenames: any = False if io.is_sequence_file(filename): sequences.append(filename) any = True if annotation.is_annotation_file(filename): annotations.append(filename) any = True if not any: raise grace.Error(filename + ' is neither a sequence file nor an annotation file that nesoni can read.') reference = Reference(self.output_dir, must_exist=False) reference.set_sequences(sequences) reference.set_annotations(annotations) if self.ls: reference.build_shrimp_mmap(False) if self.cs: reference.build_shrimp_mmap(True)
def run(self): sequences = [] annotations = [] for filename in self.filenames: any = False if io.is_sequence_file(filename): sequences.append(filename) any = True if annotation.is_annotation_file(filename): annotations.append(filename) any = True if not any: raise grace.Error( filename + ' is neither a sequence file nor an annotation file that nesoni can read.' ) reference = Reference(self.output_dir, must_exist=False) reference.set_sequences(sequences) reference.set_annotations(annotations) if self.ls: reference.build_shrimp_mmap(False) if self.cs: reference.build_shrimp_mmap(True)
def run(self): bams = [ ] reference = None reference2 = None extra = [ ] for sample in self.samples: if sam.is_bam(sample): bams.append(sample) elif os.path.isdir(sample): working = working_directory.Working(sample,True) bams.append( working.get_filtered_sorted_bam() ) extra.append( '##sampleTags=' + ','.join(working.get_tags()) ) if reference2 is None: reference2 = working.get_reference().reference_fasta_filename() elif io.is_sequence_file(sample): assert reference is None, 'Only one reference FASTA file allowed.' reference = sample if reference is None: reference = reference2 if reference is None: raise grace.Error('No reference FASTA file given.') with nesoni.Stage() as stage: tempspace = stage.enter( workspace.tempspace() ) if self.depth_limit: with nesoni.Stage() as stage2: for i in xrange(len(bams)): sam.Bam_depth_limit( tempspace/('%d'%i), bams[i], depth=self.depth_limit ).process_make(stage2) bams[i] = tempspace/('%d.bam'%i) # FreeBayes claims to handle multiple bams, but it doesn't actually work if len(bams) > 1: sam.Bam_merge(tempspace/'merged', bams=bams, index=False).run() bams = [ tempspace/'merged.bam' ] command = [ 'freebayes', '-f', reference, '--ploidy',str(self.ploidy), '--pvar',str(self.pvar), ] + self.freebayes_options + bams self.log.log('Running: '+' '.join(command)+'\n') f_out = stage.enter( open(self.prefix+'.vcf','wb') ) f_in = stage.enter( io.pipe_from(command) ) done_extra = False for line in f_in: if not done_extra and not line.startswith('##'): for extra_line in extra: f_out.write(extra_line+'\n') done_extra = True f_out.write(line) index_vcf(self.prefix+'.vcf')
def run(self): bams = [] reference = None reference2 = None extra = [] for sample in self.samples: if sam.is_bam(sample): bams.append(sample) elif os.path.isdir(sample): working = working_directory.Working(sample, True) bams.append(working.get_filtered_sorted_bam()) extra.append('##sampleTags=' + ','.join(working.get_tags())) if reference2 is None: reference2 = working.get_reference( ).reference_fasta_filename() elif io.is_sequence_file(sample): assert reference is None, 'Only one reference FASTA file allowed.' reference = sample if reference is None: reference = reference2 if reference is None: raise grace.Error('No reference FASTA file given.') with nesoni.Stage() as stage: tempspace = stage.enter(workspace.tempspace()) if self.depth_limit: with nesoni.Stage() as stage2: for i in xrange(len(bams)): sam.Bam_depth_limit( tempspace / ('%d' % i), bams[i], depth=self.depth_limit).process_make(stage2) bams[i] = tempspace / ('%d.bam' % i) # FreeBayes claims to handle multiple bams, but it doesn't actually work if len(bams) > 1: sam.Bam_merge(tempspace / 'merged', bams=bams, index=False).run() bams = [tempspace / 'merged.bam'] command = [ 'freebayes', '-f', reference, '--ploidy', str(self.ploidy), '--pvar', str(self.pvar), ] + self.freebayes_options + bams self.log.log('Running: ' + ' '.join(command) + '\n') f_out = stage.enter(open(self.prefix + '.vcf', 'wb')) f_in = stage.enter(io.pipe_from(command)) done_extra = False for line in f_in: if not done_extra and not line.startswith('##'): for extra_line in extra: f_out.write(extra_line + '\n') done_extra = True f_out.write(line) index_vcf(self.prefix + '.vcf')