def assign_ids(self): """Assign sequential ids to active primers. Resets any ids previously set. """ Primer.update(_id=-1).execute() primers = list( Primer.select() .where(Primer.seq << self.primers) .order_by(Primer.ratio.desc()).execute()) for i, primer in enumerate(primers): primer._id = i + 1 return primers
def __init__(self, primers=None): """Create a new list of primers. :param primers: a list of Primer objects or a list of primer sequences. If None, selects all primers. """ if primers is None: self._primers = Primer.select() self.n = int(self._primers.count()) elif isinstance(primers, file): self._primers = read_primer_list(primers) self.n = len(self._primers) else: self._primers = Primer.select().where(Primer.seq << primers) self.n = int(self._primers.count())
def __iter__(self): query = ( Primer.select() .where(Primer.seq << self.primers) .order_by(Primer._id) ) return query.iterator()
def select_active(): active = Primer.select().where(Primer.active == True) if active.count() == 0: error( 'No active primers found. Run `swga filter` or `swga activate` ' 'first.', exception=False) return Primers(active)
def filter_min_fg_rate(self, min_bind): """Remove primers that bind less than `min_bind` to the foreground.""" results = Primer.select().where( (Primer.seq << self.primers) & (Primer.fg_freq >= min_bind)) message( '{}/{} primers bind the foreground genome >= {} times' .format(results.count(), self.n, min_bind)) return results
def filter_max_bg_rate(self, rate): """Remove primers that bind more than `rate` to the background genome.""" results = Primer.select().where( (Primer.seq << self.primers) & (Primer.bg_freq <= rate)) message( '{}/{} primers bind the background genome <= {} times' .format(results.count(), self.n, rate)) return results
def update_melt_temps(self): """Calculate melting temp for any primers that don't have it.""" targets = list(Primer.select().where( (Primer.seq << self.primers) & (Primer.tm >> None))) if len(targets) > 0: message( 'Finding melting temps for {} primers...' .format(len(targets))) for primer in targets: primer.update_tm() return targets
def update_gini(self, fg_genome_fp): """Calculate Gini coef for any primers that don't have it.""" targets = list(Primer.select().where( (Primer.seq << self.primers) & (Primer.gini >> None))) if len(targets) > 0: message( 'Finding Gini coefficients for {} primers...' .format(len(targets))) for primer in targets: primer._update_gini(fg_genome_fp) return targets
def update_locations(self, fg_genome_fp): """Find binding locations for any primers that don't have them.""" targets = list(Primer.select().where( (Primer.seq << self.primers) & (Primer._locations >> None))) if len(targets) > 0: message( 'Finding binding locations for {} primers...' .format(len(targets))) for primer in targets: primer._update_locations(fg_genome_fp) return targets
def filter_tm_range(self, min_tm, max_tm): """Remove primers that have melting temps outside this range. Finds any missing melt temps for primers. """ self.update_melt_temps() results = Primer.select().where( (Primer.seq << self.primers) & (Primer.tm <= max_tm) & (Primer.tm >= min_tm)) message( '{}/{} primers have a melting temp between {} and {} C' .format(results.count(), self.n, min_tm, max_tm)) return results
def activate(self, min_active=1): """Activate all the primers in the list. :param min_active: The maximum number expected to activate. Warns if fewer than this number. """ n = (Primer.update(active=True) .where(Primer.seq << self.primers) .execute()) message('Marked {} primers as active.'.format(n)) if n < min_active: message( 'Note: Fewer than {} primers were selected ({} passed all the ' 'filters). You may want to try less restrictive filtering ' 'parameters.'.format(min_active, n)) return self
def add(primer_dicts, add_revcomp=True): """Add the given primer dicts to the database in chunks. :param primer_dicts: list of dicts defining primers :param add_revcomp: if True, add the reverse complement of the primer as well """ if add_revcomp: def mkrevcomp(p): p2 = dict(**p) p2['seq'] = locate.revcomp(p['seq']) return p2 primer_dicts += [mkrevcomp(p) for p in primer_dicts] utils.chunk_iterator( primer_dicts, fn=lambda c: Primer.insert_many(c).execute(), n=199, label="Updating database: ")
def add_primers(primers, chunksize=199, add_revcomp=True): """Add an arbitrary amount of primers to the database in chunks. :param primers: a list of Primer(s) or a Primers object """ if add_revcomp: def mkrevcomp(p): p2 = dict(**p) p2['seq'] = locate.revcomp(p['seq']) return p2 primers += [mkrevcomp(p) for p in primers] utils.chunk_iterator( primers, fn=lambda c: Primer.insert_many(c).execute(), n=chunksize, label="Updating database: ")
def read_primer_list(lines): """Read in a list of primers and return their records from the db. :param lines: a list of primer sequences, one per line; anything after the first whitespace is ignored. """ seqs = [re.split(r'[ \t]+', line.strip('\n'))[0] for line in lines] primers = list(Primer.select().where(Primer.seq << seqs).execute()) if len(primers) < len(seqs): primer_seqs = [p.seq for p in primers] missing = [_ for _ in seqs if _ not in primer_seqs] for seq in missing: warn( seq + ' not in the database; skipping. Add it manually with ' '`swga count --input <file>` ') return primers
def limit_to(self, n): """ Sort by background binding rate, selects the top `n` least frequently binding, then returns those ordered by descending bg/fg ratio. """ if n < 1: raise ValueError('n must be greater than 1') first_pass = ( Primer.select().where(Primer.seq << self.primers) .order_by(Primer.bg_freq) .limit(n)) results = (Primer .select().where(Primer.seq << first_pass) .order_by(Primer.ratio.desc())) return results
def filter_max_gini(self, gini_max, fg_genome_fp): """Remove primers with Gini coefficients less than `gini_max`. Finds binding locations and Gini coefficients for primers that do not have them already. :param gini_max: max Gini coefficient (0-1) """ if 0 > gini_max > 1: raise ValueError('Gini coefficient must be between 0-1') (self .update_locations(fg_genome_fp) .update_gini(fg_genome_fp)) results = Primer.select().where( (Primer.seq << self.primers) & (Primer.gini <= gini_max)) message( '{}/{} primers have a Gini coefficient <= {}' .format(results.count(), self.n, gini_max)) return results
def select_by_seqs(seqs): selected = Primer.select().where(Primer.seq << seqs) return Primers(selected)
def select_by_ids(ids): selected = Primer.select().where(Primer._id << ids) assert selected.count > 0 return Primers(selected)
def upsert_chunk(chunk): Primer.delete().where(Primer.seq << chunk).execute() Primer.insert_many(p.to_dict() for p in chunk).execute()