def test_primers_collection_example(): def create_new_primer(existing_primers): """Create a new primer based on the primers created so far""" problem = DnaOptimizationProblem( sequence=random_dna_sequence(length=20), constraints=[ AvoidHeterodimerization(existing_primers, tmax=3), AvoidPattern("3x3mer"), AvoidPattern("4xG"), ], objectives=[EnforceGCContent(target=0.6)], logger=None, ) problem.resolve_constraints() problem.optimize() return problem.sequence # MAIN LOOP, WHERE PRIMERS ARE CREATED ONE BY ONE existing_primers = [] for i in range(10): new_primer = create_new_primer(existing_primers) existing_primers.append(new_primer) print("PRIMERS GENERATED: \n\n%s\n" % "\n".join(existing_primers)) for sequence in existing_primers: assert "GGGG" not in sequence assert "CCCC" not in sequence max_tm = max( primer3.calcHeterodimer(seq1, seq2).tm for seq1, seq2 in itertools.combinations(existing_primers, 2)) assert max_tm < 3 gc_contents = [gc_content(p) for p in existing_primers] assert min(gc_contents) > 0.55 assert max(gc_contents) < 0.65
def evaluate(self, problem): """Return the sum of breaches extent for all windowed breaches.""" wstart, wend = self.location.start, self.location.end sequence = self.location.extract_sequence(problem.sequence) gc = gc_content(sequence, window_size=self.window) breaches = (np.maximum(0, self.mini - gc) + np.maximum(0, gc - self.maxi)) score = -breaches.sum() breaches_starts = wstart + (breaches > 0).nonzero()[0] if len(breaches_starts) == 0: breaches_locations = [] elif len(breaches_starts) == 1: if self.window is not None: start = breaches_starts[0] breaches_locations = [[start, start + self.window]] else: breaches_locations = [[wstart, wend]] else: segments = [(bs, bs + self.window) for bs in breaches_starts] groups = group_nearby_segments(segments, max_start_spread=max( 1, self.locations_span)) breaches_locations = [(group[0][0], group[-1][-1]) for group in groups] if breaches_locations == []: message = "Passed !" else: breaches_locations = [Location(*loc) for loc in breaches_locations] message = ("Out of bound on segments " + ", ".join([str(l) for l in breaches_locations])) return SpecEvaluation(self, problem, score, locations=breaches_locations, message=message)
def evaluate_end(self, sequence): gc = gc_content(sequence) return -(max(0, self.mini - gc) + max(0, gc - self.maxi))
objectives=[EnforceGCContent(target=0.6)], logger=None, ) problem.resolve_constraints() problem.optimize() return problem.sequence # MAIN LOOP, WHERE PRIMERS ARE CREATED ONE BY ONE existing_primers = [] for i in range(20): new_primer = create_new_primer(existing_primers) existing_primers.append(new_primer) print("PRIMERS GENERATED: \n\n%s\n" % "\n".join(existing_primers)) # (OPTIONAL VERIFICATION OF THE COLLECTION) import itertools import primer3 from dnachisel.biotools import gc_content max_tm = max( primer3.calcHeterodimer(seq1, seq2).tm for seq1, seq2 in itertools.combinations(existing_primers, 2)) print("Max Tm heterodimerization between any 2 primers: %.2f" % max_tm) gc_contents = [gc_content(p) for p in existing_primers] print("GC content range %.2f-%.2f" % (min(gc_contents), max(gc_contents)))