def __init__(self, parent): # Save parent # self.parent, self.assemble_group = parent, parent self.samples = parent.samples self.pool = self.parent.pool self.primers = self.pool.primers # Auto paths # self.base_dir = parent.p.groups_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # More # self.orig_reads = self.parent.cls(self.p.orig_fastq, samples=self.samples) self.n_filtered = self.parent.cls(self.p.n_filtered, samples=self.samples) # Quality filtered # if self.parent == 'assembled': self.qual_filtered = BarcodedFASTQ(self.p.qual_filtered, samples=self.samples, primers=self.primers) self.len_filtered = BarcodedFASTQ(self.p.len_filtered_fastq, samples=self.samples, primers=self.primers) self.trimmed_barcodes = FASTA(self.p.trimmed_barcodes) # Further # self.load()
class PrimerGroup(object): """A bunch of sequences all having the same type of primer outcome (and assembly outcome)""" all_paths = """ /orig.fastq /n_filtered.fastq /qual_filtered.fastq /len_filtered.fastq /trimmed_barcodes.fasta """ qual_threshold = 5 qual_windowsize = 10 min_length = 400 def __repr__(self): return '<%s object of %s>' % (self.__class__.__name__, self.parent) def __len__(self): return len(self.orig_reads) def create(self): self.orig_reads.create() def add_seq(self, read): self.orig_reads.add_seq(read) def close(self): self.orig_reads.close() def __init__(self, parent): # Save parent # self.parent, self.assemble_group = parent, parent self.samples = parent.samples self.pool = self.parent.pool self.primers = self.pool.primers # Auto paths # self.base_dir = parent.p.groups_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # More # self.orig_reads = self.parent.cls(self.p.orig_fastq, samples=self.samples) self.n_filtered = self.parent.cls(self.p.n_filtered, samples=self.samples) # Quality filtered # if self.parent == 'assembled': self.qual_filtered = BarcodedFASTQ(self.p.qual_filtered, samples=self.samples, primers=self.primers) self.len_filtered = BarcodedFASTQ(self.p.len_filtered_fastq, samples=self.samples, primers=self.primers) self.trimmed_barcodes = FASTA(self.p.trimmed_barcodes) # Further # self.load() def load(self): pass def n_filter(self): """Called from AssembleGroup.discard_reads_with_n""" def no_n_iterator(reads): fwd_len = self.pool.primers.fwd_len rev_len = self.pool.primers.rev_len for read in reads: if 'N' in read[fwd_len:-rev_len]: continue yield read self.n_filtered.write(no_n_iterator(self.orig_reads)) def qual_filter(self): """Called from Assemble.quality_filter""" def good_qual_iterator(reads): for read in reads: averaged = moving_average(read.letter_annotations["phred_quality"], self.qual_windowsize) if any([value < self.qual_threshold for value in averaged]): continue yield read self.qual_filtered.write(good_qual_iterator(self.n_filtered)) def len_filter(self): """Called from Assemble.length_filter""" def good_len_iterator(reads): for read in reads: if len(read) < self.min_length: continue yield read self.len_filtered.write(good_len_iterator(self.qual_filtered)) def trim_bc(self): """Called from Assemble.trim_barcodes""" def no_barcodes_iterator(reads): for read in reads: yield read[self.pool.bar_len:-self.pool.bar_len] if self.pool.bar_len == 0: self.len_filtered.to_fasta(self.trimmed_barcodes) else: self.trimmed_barcodes.write(no_barcodes_iterator(self.len_filtered))