Ejemplo n.º 1
0
def test_EnforceRegionsCompatibility():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(123)

    def compatibility_condition(location1, location2, problem):
        seq1 = location1.extract_sequence(problem.sequence)
        seq2 = location2.extract_sequence(problem.sequence)
        return sequences_differences(seq1, seq2) >= 2

    locations = [(0, 4), (50, 54), (100, 104), (150, 154)]
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(200, seed=123),
        constraints=[
            EnforceRegionsCompatibility(
                locations=locations,
                compatibility_condition=compatibility_condition,
                condition_label="2bp difference",
            ),
            EnforceGCContent(mini=0.4, maxi=0.6, window=40),
        ],
        logger=None,
    )
    assert not any([e.passes for e in problem.constraints_evaluations()])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    seq = problem.sequence
    assert [
        sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2
        for (s1, e1), (s2, e2) in itertools.combinations(locations, 2)
    ]
Ejemplo n.º 2
0
 def optimize(self, codon_table):
     self.optimize_frequent(codon_table)
     # return
     opt_codons = self.__vaccine_codons_gen.copy()
     self.__vaccine_codons_gen.clear()
     vac_strand = self.get_strand(opt_codons)
     #vir_strand = self.get_strand(self.__virus_codons)
     codon_table = pct.get_codons_table(codon_table)
     problem = DnaOptimizationProblem(
         sequence=vac_strand,
         constraints=[
             EnforceTranslation(genetic_table='Standard',
                                start_codon='ATG'),
             EnforceGCContent(mini=0.54, maxi=0.9, window=120)
         ],
         objectives=[
             CodonOptimize(method="use_best_codon",
                           codon_usage_table=codon_table)
         ]
     )
     problem.resolve_constraints()
     problem.optimize()
     self.__vaccine_codons_gen = []
     count = 1
     vcodon = ""
     for x in problem.sequence:
         if count % 3 == 0:
             vcodon += x
             self.__vaccine_codons_gen.append(vcodon)
             vcodon = ""
         else:
             vcodon += x
         count += 1
     return
Ejemplo n.º 3
0
def test_AvoidStopCodons():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(sequence="ATTGCCATCTAA",
                                     constraints=[AvoidStopCodons()])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 4
0
def test_avoid_repeated_small_kmers():
    problem = DnaOptimizationProblem(
        sequence="AGAAGAAGAAGAAGAAGATTTTTTTTTTTTTGGAGGAGGAGGACCCCCCCCCCCCGAGG",
        constraints=[AvoidPattern(RepeatedKmerPattern(3, 3))])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 5
0
def test_no_solution_error_frozen_region():
    problem = DnaOptimizationProblem(
        sequence="AAAAATCGTCTCTTTT",
        constraints=[AvoidChanges(),
                     AvoidPattern(enzyme='BsmBI')])
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Constraint breach in frozen region' in str(err.value)
Ejemplo n.º 6
0
def test_no_solution_error_frozen_region():
    problem = DnaOptimizationProblem(
        sequence="AAAAATCGTCTCTTTT",
        constraints=[AvoidChanges(), AvoidPattern('BsmBI_site')]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'region that cannot be mutated' in str(err.value)
Ejemplo n.º 7
0
def test_avoid_pattern_basics():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(sequence=random_dna_sequence(10000,
                                                                  seed=123),
                                     constraints=[AvoidPattern(enzyme="BsaI")])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_AvoidNonUniqueSegments_from_polyAs():
    problem = DnaOptimizationProblem(
        sequence= 40 * "A",
        constraints=[AvoidNonUniqueSegments(3, location=(10, 30))]
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 9
0
def test_AvoidNonuniqueSegments_as_constraint():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[AvoidNonuniqueSegments(8)])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 10
0
def test_no_solution_error_exhaustive_search():
    problem = DnaOptimizationProblem(
        sequence="TTTTTTT",
        constraints=[AvoidChanges((0, 4)), EnforceGCContent(mini=0.8)]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Exhaustive search failed' in str(err.value)
Ejemplo n.º 11
0
def test_no_solution_error_random_search():
    problem = DnaOptimizationProblem(
        sequence="TTTTTTTTTTTTTTTTTTTTTTTTTTTT",
        constraints=[AvoidChanges((0, 10)), EnforceGCContent(mini=0.8)]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Random search did not' in str(err.value)
Ejemplo n.º 12
0
def test_avoid_pattern_overlapping_locations():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence="AGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAG",
        constraints=[AvoidPattern("NAN")])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert "A" not in problem.sequence[1:-1]
Ejemplo n.º 13
0
def test_UniquifyAllKmers_as_constraint():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[UniquifyAllKmers(8)],
                                     logger=None)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 14
0
def test_avoid_hairpins_on_extremities():
    # see https://github.com/Edinburgh-Genome-Foundry/DnaChisel/issues/37
    problem = DnaOptimizationProblem(
        sequence="attcaatgggggggggggggggggggggggggtagccta",
        constraints=[AvoidHairpins(stem_size=3, hairpin_window=8)])
    evaluation = problem.constraints_evaluations().evaluations[0]
    assert str(evaluation.locations) == "[0-6, 32-39]"
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 15
0
def test_UniquifyAllKmers_from_polyAs():
    problem = DnaOptimizationProblem(
        sequence=40 * "A",
        constraints=[UniquifyAllKmers(3, location=(10, 30))],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 16
0
def test_UniquifyAllKmers_from_polyAs_uncached():
    """Uncaching actually calls another function get_kmer_extractor."""
    constraint = UniquifyAllKmers(3, location=(10, 30))
    constraint.use_cache = False
    problem = DnaOptimizationProblem(sequence=40 * "A",
                                     constraints=[constraint],
                                     logger=None)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 17
0
def test_pattern_and_reverse():
    bsmbi = "CGTCTC"
    bsmbi_rev = "GAGACG"
    sequence = 10 * bsmbi + 25 * bsmbi_rev + 15 * bsmbi + 15 * bsmbi_rev
    problem = DnaOptimizationProblem(sequence,
                                     constraints=[AvoidPattern('BsmBI_site')],
                                     objectives=[AvoidChanges()])
    problem.resolve_constraints()
    problem.optimize()
    assert sum(problem.sequence_edits_as_array()) < 70
Ejemplo n.º 18
0
def test_EnforceTranlation():
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(50, seed=123))
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidPattern("AAA"), EnforceTranslation()],
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 19
0
def test_AvoidStopCodons():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence="".join(["ATT", "TAG", "GCC", "TGA", "ATC", "TAA"]),
        constraints=[AvoidStopCodons()],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert "*" not in translate(problem.sequence)
Ejemplo n.º 20
0
def test_codon_optimize_as_hard_constraint():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(2000, seed=123),
        constraints=[
            EnforceTranslation(location=Location(1000, 1300)),
            CodonOptimize(location=Location(1000, 1300), species='e_coli')
        ]
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 21
0
def test_AvoidPattern_with_jaspar_motifs():
    stringio = StringIO(JASPAR_CONTENT)
    motif_patterns = MotifPssmPattern.list_from_file(stringio,
                                                     file_format="jaspar",
                                                     relative_threshold=0.9)
    problem = DnaOptimizationProblem(
        sequence="GGGGGGGGGGTGCGTGATTAAAGGGGG",
        constraints=[AvoidPattern(p) for p in motif_patterns],
    )
    assert 2 == len(problem.constraints_evaluations().all_locations())
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 22
0
def test_basics():
    numpy.random.seed(123)
    probas = {'A': 0.2, 'T': 0.2, 'G': 0.3, 'C': 0.3}
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, probas=probas, seed=123),
        constraints=[
            AvoidPattern(enzyme="BsaI"),
            EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50)
        ])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_AvoidPattern_with_regular_expression():
    sequence = ("ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTG"
                "GTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGCGCGGC"
                "GAGGGCGAGGGCGATGCCACCAACGGCAAGCTGACCCTGAAGTTCATC")
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation(),
                     AvoidPattern(r"GGT(.*)GAT")],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 24
0
def test_avoid_blast_matches():
    avoided_seqs = [
        "GTCCTCATGCGAAAGCTACGATCGCCAACCCTGT",
        "ACCCACCTCGTTACGTCCACGGCACGAGGAATGATCTCGAGTTGCTTT"
    ]
    constraint = AvoidBlastMatches(sequences=avoided_seqs, min_align_length=8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[constraint])
    assert not problem.all_constraints_pass()
    cst_eval = constraint.evaluate(problem)
    assert len(cst_eval.locations) == 10
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 25
0
def test_avoid_matches_with_phage():
    PHAGE_TAXID = "697289"
    collection = GenomeCollection()
    index = collection.get_taxid_bowtie_index_path(PHAGE_TAXID, version="1")
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(30, seed=123),
        constraints=[AvoidMatches(bowtie_index=index, match_length=10)],
        logger=None,
    )
    all_breaches = problem.constraints_evaluations().all_locations()
    assert len(all_breaches) == 5
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 26
0
def test_AllowPrimer():
    primers = ["ATTGCGCCAAACT", "TAATCCACCCTAAT", "ATTCACACTTCAA"]
    problem = DnaOptimizationProblem(sequence=40 * "A",
                                     constraints=[
                                         AllowPrimer(
                                             tmin=50,
                                             tmax=60,
                                             max_homology_length=5,
                                             location=(10, 30),
                                             avoid_heterodim_with=primers)
                                     ])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 27
0
def test_EnforceGCContents():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[
            AvoidPattern(enzyme="BsaI"),
            EnforceGCContent(mini=0.3, maxi=0.7, window=50)
        ],
        objectives=[EnforceGCContent(target=0.4)]
    )

    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 28
0
def test_EnforceTranlationReversed():
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(50, seed=123))
    rev_sequence = reverse_complement(sequence)
    problem = DnaOptimizationProblem(
        sequence=rev_sequence,
        constraints=[
            AvoidPattern("AGC"),
            EnforceTranslation(location=(0, len(sequence), -1))
        ],
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 29
0
def test_basics():
    numpy.random.seed(123)
    probas = {"A": 0.2, "T": 0.2, "G": 0.3, "C": 0.3}
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, probas=probas, seed=123),
        constraints=[
            AvoidPattern("BsaI_site"),
            EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50),
        ],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Ejemplo n.º 30
0
def test_AvoidChanges_with_max_edits():
    numpy.random.seed(1)
    problem = DnaOptimizationProblem(
        sequence="ATATATATATA",
        constraints=[
            AvoidChanges(max_edits=2),
            AvoidPattern("ATATA"),
            EnforcePatternOccurence("A", occurences=6, location=(0, 11, 1)),
            EnforcePatternOccurence("T", occurences=4, location=(0, 11, 1)),
        ],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()