예제 #1
0
def test_primers_collection_example():
    def create_new_primer(existing_primers):
        """Create a new primer based on the primers created so far"""
        problem = DnaOptimizationProblem(
            sequence=random_dna_sequence(length=20),
            constraints=[
                AvoidHeterodimerization(existing_primers, tmax=3),
                AvoidPattern("3x3mer"),
                AvoidPattern("4xG"),
            ],
            objectives=[EnforceGCContent(target=0.6)],
            logger=None,
        )
        problem.resolve_constraints()
        problem.optimize()
        return problem.sequence

    # MAIN LOOP, WHERE PRIMERS ARE CREATED ONE BY ONE

    existing_primers = []
    for i in range(10):
        new_primer = create_new_primer(existing_primers)
        existing_primers.append(new_primer)

    print("PRIMERS GENERATED: \n\n%s\n" % "\n".join(existing_primers))

    for sequence in existing_primers:
        assert "GGGG" not in sequence
        assert "CCCC" not in sequence

    max_tm = max(
        primer3.calcHeterodimer(seq1, seq2).tm
        for seq1, seq2 in itertools.combinations(existing_primers, 2))
    assert max_tm < 3

    gc_contents = [gc_content(p) for p in existing_primers]
    assert min(gc_contents) > 0.55
    assert max(gc_contents) < 0.65
예제 #2
0
    def evaluate(self, problem):
        """Return the sum of breaches extent for all windowed breaches."""
        wstart, wend = self.location.start, self.location.end
        sequence = self.location.extract_sequence(problem.sequence)
        gc = gc_content(sequence, window_size=self.window)
        breaches = (np.maximum(0, self.mini - gc) +
                    np.maximum(0, gc - self.maxi))
        score = -breaches.sum()
        breaches_starts = wstart + (breaches > 0).nonzero()[0]

        if len(breaches_starts) == 0:
            breaches_locations = []
        elif len(breaches_starts) == 1:
            if self.window is not None:
                start = breaches_starts[0]
                breaches_locations = [[start, start + self.window]]
            else:
                breaches_locations = [[wstart, wend]]
        else:
            segments = [(bs, bs + self.window) for bs in breaches_starts]
            groups = group_nearby_segments(segments,
                                           max_start_spread=max(
                                               1, self.locations_span))
            breaches_locations = [(group[0][0], group[-1][-1])
                                  for group in groups]

        if breaches_locations == []:
            message = "Passed !"
        else:
            breaches_locations = [Location(*loc) for loc in breaches_locations]
            message = ("Out of bound on segments " +
                       ", ".join([str(l) for l in breaches_locations]))
        return SpecEvaluation(self,
                              problem,
                              score,
                              locations=breaches_locations,
                              message=message)
예제 #3
0
 def evaluate_end(self, sequence):
     gc = gc_content(sequence)
     return -(max(0, self.mini - gc) + max(0, gc - self.maxi))
예제 #4
0
        objectives=[EnforceGCContent(target=0.6)],
        logger=None,
    )
    problem.resolve_constraints()
    problem.optimize()
    return problem.sequence


# MAIN LOOP, WHERE PRIMERS ARE CREATED ONE BY ONE

existing_primers = []
for i in range(20):
    new_primer = create_new_primer(existing_primers)
    existing_primers.append(new_primer)

print("PRIMERS GENERATED: \n\n%s\n" % "\n".join(existing_primers))

# (OPTIONAL VERIFICATION OF THE COLLECTION)

import itertools
import primer3
from dnachisel.biotools import gc_content

max_tm = max(
    primer3.calcHeterodimer(seq1, seq2).tm
    for seq1, seq2 in itertools.combinations(existing_primers, 2))
print("Max Tm heterodimerization between any 2 primers: %.2f" % max_tm)

gc_contents = [gc_content(p) for p in existing_primers]
print("GC content range %.2f-%.2f" % (min(gc_contents), max(gc_contents)))