def test_circular_sequence_optimize_with_report(tmpdir):
    """Test that the custom function of CircularDnaOptimizationProblems works.
    """
    np.random.seed(123)
    # Until the feature gets more battle-test, we're making sure it works
    # across a range of sequences.
    dna_sequence = (
        "CTC"
        + dc.random_dna_sequence(100)
        + "CGTCTC"
        + dc.random_dna_sequence(100)
        + "CGT"
    )
    problem = dc.CircularDnaOptimizationProblem(
        sequence=dna_sequence,
        constraints=[
            dc.AvoidPattern("BsmBI_site"),
            dc.EnforceGCContent(
                mini=0.4, maxi=0.6, location=(150, 250), window=50
            ),
            dc.UniquifyAllKmers(k=9, location=(10, 100)),
        ],
        logger=None,
    )

    target = os.path.join(str(tmpdir), "circular_with_solution")
    os.mkdir(target)
    assert os.listdir(target) == []
    assert not problem.all_constraints_pass()
    success, message, data = problem.optimize_with_report(target)
    assert problem.all_constraints_pass()
    record = problem.to_record()
    assert str(record.seq) != dna_sequence
def test_circular_sequence_basic():
    np.random.seed(123)
    # Until the feature gets more battle-test, we're making sure it works
    # across a range of sequences.
    for i in range(4):
        dna_sequence = (
            "CTC"
            + dc.random_dna_sequence(100)
            + "CGTCTC"
            + dc.random_dna_sequence(100)
            + "CGT"
        )
        problem = dc.CircularDnaOptimizationProblem(
            sequence=dna_sequence,
            constraints=[
                dc.AvoidPattern("BsmBI_site"),
                dc.EnforceGCContent(
                    mini=0.4, maxi=0.6, location=(150, 250), window=50
                ),
                dc.UniquifyAllKmers(k=9, location=(10, 100)),
            ],
            logger=None,
        )
        assert not problem.all_constraints_pass()
        problem.resolve_constraints()
        assert problem.all_constraints_pass()
Beispiel #3
0
def test_whole_sequence_change_objective_100():
    np.random.seed(123)
    problem = dc.DnaOptimizationProblem(
        sequence=dc.random_dna_sequence(50), objectives=[dc.EnforceChanges()]
    )
    problem.optimize()
    assert problem.number_of_edits() == 50
Beispiel #4
0
def test_EnforceRegionsCompatibility():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(123)

    def compatibility_condition(location1, location2, problem):
        seq1 = location1.extract_sequence(problem.sequence)
        seq2 = location2.extract_sequence(problem.sequence)
        return sequences_differences(seq1, seq2) >= 2

    locations = [(0, 4), (50, 54), (100, 104), (150, 154)]
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(200, seed=123),
        constraints=[
            EnforceRegionsCompatibility(
                locations=locations,
                compatibility_condition=compatibility_condition,
                condition_label="2bp difference",
            ),
            EnforceGCContent(mini=0.4, maxi=0.6, window=40),
        ],
        logger=None,
    )
    assert not any([e.passes for e in problem.constraints_evaluations()])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    seq = problem.sequence
    assert [
        sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2
        for (s1, e1), (s2, e2) in itertools.combinations(locations, 2)
    ]
Beispiel #5
0
def make_restriction_part(part_length, left_overhang, right_overhang,
                          enzyme, forbidden_enzymes, assembly_enzyme='BsmBI'):
    l_left = len(left_overhang)
    l_right = len(right_overhang)
    left_overhang_location = (0, l_left)
    right_overhang_location = (l_left + part_length,
                               l_left + part_length + l_right)
    center_location = (l_left, l_left + part_length)
    core_sequence = (left_overhang + dc.random_dna_sequence(part_length)
                     + right_overhang)
    enforce_enzyme = dc.EnforcePatternOccurence(
        enzyme=enzyme, location=center_location)
    problem = dc.DnaOptimizationProblem(
        sequence=core_sequence,
        constraints=[
            dc.AvoidChanges(left_overhang_location),
            dc.AvoidChanges(right_overhang_location),
        ] + [enforce_enzyme] + [
            dc.AvoidPattern(enzyme=enzyme_name)
            for enzyme_name in forbidden_enzymes + [assembly_enzyme]
        ]
    )
    problem.resolve_constraints()
    core_sequence = dc.sequence_to_biopython_record(problem.sequence)
    for loc in [left_overhang_location, right_overhang_location]:
        dc.annotate_record(core_sequence, loc, 'overhang')
    site_location = enforce_enzyme.evaluate(problem).data['matches'][0]
    dc.annotate_record(core_sequence, site_location.to_tuple(), enzyme)
    assembly_site = Restriction.__dict__[assembly_enzyme].site
    flank = dc.sequence_to_biopython_record(assembly_site + 'A')
    dc.annotate_record(flank, label='flank')
    return flank + core_sequence + flank.reverse_complement()
Beispiel #6
0
def test_whole_sequence_change_constraint_100():
    np.random.seed(123)
    problem = dc.DnaOptimizationProblem(
        sequence=dc.random_dna_sequence(50), constraints=[dc.EnforceChanges()]
    )
    assert problem.all_constraints_pass()  # due to initial seq. constraining
    assert problem.number_of_edits() == 50
Beispiel #7
0
def test_enforce_changes_with_indices_as_constraint():
    np.random.seed(123)
    indices = [10, 20] + list(range(30, 40)) + [44, 45, 46]
    problem = dc.DnaOptimizationProblem(
        sequence=dc.random_dna_sequence(50),
        constraints=[dc.EnforceChanges(indices=indices)],
    )
    assert problem.number_of_edits() == 15
def test_feature_to_spec():
    sequence = random_dna_sequence(100)
    record = sequence_to_biopython_record(sequence)
    label = "@gc(40-60%/20bp) & @no(BsaI_site) & @keep"
    annotate_record(record, label=label)
    feature = record.features[0]
    specs = Specification.list_from_biopython_feature(feature)
    assert len(specs) == 3
def test_avoid_pattern_basics():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(sequence=random_dna_sequence(10000,
                                                                  seed=123),
                                     constraints=[AvoidPattern(enzyme="BsaI")])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_AvoidNonUniqueSegments_as_objective():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    specification = AvoidNonUniqueSegments(8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     objectives=[specification])
    problem.optimize()
    assert problem.objectives[0].evaluate(problem).passes
Beispiel #11
0
def test_AvoidNonuniqueSegments_as_constraint():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[AvoidNonuniqueSegments(8)])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Beispiel #12
0
def test_SequenceLengthBounds():
    for length, expected in [(750, True), (400, False), (1200, False)]:
        problem = dc.DnaOptimizationProblem(
            sequence=dc.random_dna_sequence(length),
            constraints=[dc.SequenceLengthBounds(500, 800)],
            logger=None,
        )
        assert problem.all_constraints_pass() == expected
Beispiel #13
0
def test_UniquifyAllKmers_as_objective():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    specification = UniquifyAllKmers(8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     objectives=[specification],
                                     logger=None)
    problem.optimize()
    assert problem.objectives[0].evaluate(problem).passes
Beispiel #14
0
def test_UniquifyAllKmers_as_constraint():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[UniquifyAllKmers(8)],
                                     logger=None)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Beispiel #15
0
def test_avoid_hairpin_basics():
    numpy.random.seed(123)
    random_sequences = [random_dna_sequence(30) for i in range(10)]

    full_sequence = "".join([
        seq
        for sequence in random_sequences
        for seq in (random_dna_sequence(50),
                    sequence,
                    random_dna_sequence(50),
                    reverse_complement(sequence),
                    random_dna_sequence(50))
    ])

    problem = DnaOptimizationProblem(full_sequence,
                                     constraints=[AvoidHairpins()])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Beispiel #16
0
def test_whole_sequence_change_constraint_4():
    np.random.seed(123)
    problem = dc.DnaOptimizationProblem(
        sequence=dc.random_dna_sequence(50),
        constraints=[dc.EnforceChanges(minimum=4)],
    )
    print(problem.number_of_edits())
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert 6 >= problem.number_of_edits() >= 4
Beispiel #17
0
def test_codon_optimize_with_custom_table():
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(1200, seed=123),
        constraints=[EnforceTranslation()],
        objectives=[CodonOptimize(
            codon_usage_table=biotools.CODON_USAGE_TABLES['b_subtilis'])]
    )
    assert (problem.objective_scores_sum() < -10)
    problem.optimize()
    assert (problem.objective_scores_sum() == 0)
def test_record_with_multispec_feature():
    sequence = random_dna_sequence(100)
    record = sequence_to_biopython_record(sequence)
    label = "@gc(40-60%/20bp) & @no(BsaI_site) & @keep"
    annotate_record(record, label=label)
    problem = DnaOptimizationProblem.from_record(record)
    assert len(problem.constraints) == 3
    c1, c2, c3 = problem.constraints
    assert c1.mini == 0.4
    assert c2.pattern.name == "BsaI"
def test_optimize_with_report(tmpdir):
    problem = DnaOptimizationProblem(sequence=random_dna_sequence(10000,
                                                                  seed=123),
                                     constraints=[AvoidPattern('BsmBI_site')])

    target = os.path.join(str(tmpdir), 'with_solution')
    os.mkdir(target)
    assert os.listdir(target) == []
    success, message, data = problem.optimize_with_report(target)
    assert success
    assert os.listdir(target) != []
Beispiel #20
0
def test_codon_optimize_with_custom_table():
    table = get_codons_table("b_subtilis")
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(1200, seed=123),
        constraints=[EnforceTranslation()],
        objectives=[CodonOptimize(codon_usage_table=table)],
        logger=None,
    )
    assert problem.objective_scores_sum() < -10
    problem.optimize()
    assert problem.objective_scores_sum() == 0
Beispiel #21
0
def random_compatible_dna_sequence(sequence_length, constraints, probas=None,
                                   seed=None, max_random_iters=5000,
                                   logger='bar', **kwargs):

    sequence = dc.random_dna_sequence(
        sequence_length, probas=probas, seed=seed)
    problem = dc.DnaOptimizationProblem(sequence, constraints=constraints,
                                        logger=logger)
    problem.max_random_iters = max_random_iters
    problem.resolve_constraints(**kwargs)
    return problem.sequence
def test_optimization_with_report_no_solution(tmpdir):
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[AvoidPattern(enzyme='BsmBI'), AvoidChanges()]
    )
    target = os.path.join(str(tmpdir), 'no_solution')
    os.mkdir(target)
    assert os.listdir(target) == []
    success, message, data = optimization_with_report(target, problem)
    assert not success
    assert os.listdir(target) != []
Beispiel #23
0
def test_codon_optimize_as_hard_constraint():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(2000, seed=123),
        constraints=[
            EnforceTranslation(location=Location(1000, 1300)),
            CodonOptimize(location=Location(1000, 1300), species='e_coli')
        ]
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Beispiel #24
0
def test_basics():
    numpy.random.seed(123)
    probas = {'A': 0.2, 'T': 0.2, 'G': 0.3, 'C': 0.3}
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, probas=probas, seed=123),
        constraints=[
            AvoidPattern(enzyme="BsaI"),
            EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50)
        ])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Beispiel #25
0
def test_optimize_with_report_no_solution(tmpdir):
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[AvoidPattern("BsmBI_site"), AvoidChanges()],
        logger=None,
    )
    target = os.path.join(str(tmpdir), "no_solution")
    os.mkdir(target)
    assert os.listdir(target) == []
    success, message, data = problem.optimize_with_report(target)
    assert not success
    assert os.listdir(target) != []
Beispiel #26
0
def test_avoid_matches_with_phage():
    PHAGE_TAXID = "697289"
    collection = GenomeCollection()
    index = collection.get_taxid_bowtie_index_path(PHAGE_TAXID, version="1")
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(30, seed=123),
        constraints=[AvoidMatches(bowtie_index=index, match_length=10)],
        logger=None,
    )
    all_breaches = problem.constraints_evaluations().all_locations()
    assert len(all_breaches) == 5
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Beispiel #27
0
def test_basics():
    numpy.random.seed(123)
    probas = {"A": 0.2, "T": 0.2, "G": 0.3, "C": 0.3}
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, probas=probas, seed=123),
        constraints=[
            AvoidPattern("BsaI_site"),
            EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50),
        ],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_EnforceGCContents():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[
            AvoidPattern(enzyme="BsaI"),
            EnforceGCContent(mini=0.3, maxi=0.7, window=50)
        ],
        objectives=[EnforceGCContent(target=0.4)]
    )

    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Beispiel #29
0
def test_avoid_changes_with_indices_as_constraint():
    numpy.random.seed(123)

    indices = [10, 20] + list(range(30, 40)) + [44, 45, 46]
    sequence = random_dna_sequence(50)

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidChanges(indices=indices)],
        objectives=[EnforceChanges()],
        logger=None,
    )
    problem.optimize()
    assert problem.number_of_edits() == 50 - 15
Beispiel #30
0
 def create_new_primer(existing_primers):
     """Create a new primer based on the primers created so far"""
     problem = DnaOptimizationProblem(
         sequence=random_dna_sequence(length=20),
         constraints=[
             AvoidHeterodimerization(existing_primers, tmax=3),
             AvoidPattern("3x3mer"),
             AvoidPattern("4xG"),
         ],
         objectives=[EnforceGCContent(target=0.6)],
         logger=None,
     )
     problem.resolve_constraints()
     problem.optimize()
     return problem.sequence