def test_EnforceRegionsCompatibility(): # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which # enzyme will be chosen and inserted in the sequence depends on the other # constraint on GC content numpy.random.seed(123) def compatibility_condition(location1, location2, problem): seq1 = location1.extract_sequence(problem.sequence) seq2 = location2.extract_sequence(problem.sequence) return sequences_differences(seq1, seq2) >= 2 locations = [(0, 4), (50, 54), (100, 104), (150, 154)] problem = DnaOptimizationProblem( sequence=random_dna_sequence(200, seed=123), constraints=[ EnforceRegionsCompatibility( locations=locations, compatibility_condition=compatibility_condition, condition_label="2bp difference", ), EnforceGCContent(mini=0.4, maxi=0.6, window=40), ], logger=None, ) assert not any([e.passes for e in problem.constraints_evaluations()]) problem.resolve_constraints() assert problem.all_constraints_pass() seq = problem.sequence assert [ sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2 for (s1, e1), (s2, e2) in itertools.combinations(locations, 2) ]
def optimize(self, codon_table): self.optimize_frequent(codon_table) # return opt_codons = self.__vaccine_codons_gen.copy() self.__vaccine_codons_gen.clear() vac_strand = self.get_strand(opt_codons) #vir_strand = self.get_strand(self.__virus_codons) codon_table = pct.get_codons_table(codon_table) problem = DnaOptimizationProblem( sequence=vac_strand, constraints=[ EnforceTranslation(genetic_table='Standard', start_codon='ATG'), EnforceGCContent(mini=0.54, maxi=0.9, window=120) ], objectives=[ CodonOptimize(method="use_best_codon", codon_usage_table=codon_table) ] ) problem.resolve_constraints() problem.optimize() self.__vaccine_codons_gen = [] count = 1 vcodon = "" for x in problem.sequence: if count % 3 == 0: vcodon += x self.__vaccine_codons_gen.append(vcodon) vcodon = "" else: vcodon += x count += 1 return
def test_AvoidStopCodons(): numpy.random.seed(123) problem = DnaOptimizationProblem(sequence="ATTGCCATCTAA", constraints=[AvoidStopCodons()]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_avoid_repeated_small_kmers(): problem = DnaOptimizationProblem( sequence="AGAAGAAGAAGAAGAAGATTTTTTTTTTTTTGGAGGAGGAGGACCCCCCCCCCCCGAGG", constraints=[AvoidPattern(RepeatedKmerPattern(3, 3))]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_no_solution_error_frozen_region(): problem = DnaOptimizationProblem( sequence="AAAAATCGTCTCTTTT", constraints=[AvoidChanges(), AvoidPattern(enzyme='BsmBI')]) with pytest.raises(NoSolutionError) as err: problem.resolve_constraints() assert 'Constraint breach in frozen region' in str(err.value)
def test_no_solution_error_frozen_region(): problem = DnaOptimizationProblem( sequence="AAAAATCGTCTCTTTT", constraints=[AvoidChanges(), AvoidPattern('BsmBI_site')] ) with pytest.raises(NoSolutionError) as err: problem.resolve_constraints() assert 'region that cannot be mutated' in str(err.value)
def test_avoid_pattern_basics(): numpy.random.seed(123) problem = DnaOptimizationProblem(sequence=random_dna_sequence(10000, seed=123), constraints=[AvoidPattern(enzyme="BsaI")]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AvoidNonUniqueSegments_from_polyAs(): problem = DnaOptimizationProblem( sequence= 40 * "A", constraints=[AvoidNonUniqueSegments(3, location=(10, 30))] ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AvoidNonuniqueSegments_as_constraint(): numpy.random.seed(123) sequence = random_dna_sequence(1000, seed=123) problem = DnaOptimizationProblem(sequence=sequence, constraints=[AvoidNonuniqueSegments(8)]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_no_solution_error_exhaustive_search(): problem = DnaOptimizationProblem( sequence="TTTTTTT", constraints=[AvoidChanges((0, 4)), EnforceGCContent(mini=0.8)] ) with pytest.raises(NoSolutionError) as err: problem.resolve_constraints() assert 'Exhaustive search failed' in str(err.value)
def test_no_solution_error_random_search(): problem = DnaOptimizationProblem( sequence="TTTTTTTTTTTTTTTTTTTTTTTTTTTT", constraints=[AvoidChanges((0, 10)), EnforceGCContent(mini=0.8)] ) with pytest.raises(NoSolutionError) as err: problem.resolve_constraints() assert 'Random search did not' in str(err.value)
def test_avoid_pattern_overlapping_locations(): numpy.random.seed(123) problem = DnaOptimizationProblem( sequence="AGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAG", constraints=[AvoidPattern("NAN")]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass() assert "A" not in problem.sequence[1:-1]
def test_UniquifyAllKmers_as_constraint(): numpy.random.seed(123) sequence = random_dna_sequence(1000, seed=123) problem = DnaOptimizationProblem(sequence=sequence, constraints=[UniquifyAllKmers(8)], logger=None) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_avoid_hairpins_on_extremities(): # see https://github.com/Edinburgh-Genome-Foundry/DnaChisel/issues/37 problem = DnaOptimizationProblem( sequence="attcaatgggggggggggggggggggggggggtagccta", constraints=[AvoidHairpins(stem_size=3, hairpin_window=8)]) evaluation = problem.constraints_evaluations().evaluations[0] assert str(evaluation.locations) == "[0-6, 32-39]" problem.resolve_constraints() assert problem.all_constraints_pass()
def test_UniquifyAllKmers_from_polyAs(): problem = DnaOptimizationProblem( sequence=40 * "A", constraints=[UniquifyAllKmers(3, location=(10, 30))], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_UniquifyAllKmers_from_polyAs_uncached(): """Uncaching actually calls another function get_kmer_extractor.""" constraint = UniquifyAllKmers(3, location=(10, 30)) constraint.use_cache = False problem = DnaOptimizationProblem(sequence=40 * "A", constraints=[constraint], logger=None) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_pattern_and_reverse(): bsmbi = "CGTCTC" bsmbi_rev = "GAGACG" sequence = 10 * bsmbi + 25 * bsmbi_rev + 15 * bsmbi + 15 * bsmbi_rev problem = DnaOptimizationProblem(sequence, constraints=[AvoidPattern('BsmBI_site')], objectives=[AvoidChanges()]) problem.resolve_constraints() problem.optimize() assert sum(problem.sequence_edits_as_array()) < 70
def test_EnforceTranlation(): numpy.random.seed(1234) sequence = reverse_translate(random_protein_sequence(50, seed=123)) problem = DnaOptimizationProblem( sequence=sequence, constraints=[AvoidPattern("AAA"), EnforceTranslation()], ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AvoidStopCodons(): numpy.random.seed(123) problem = DnaOptimizationProblem( sequence="".join(["ATT", "TAG", "GCC", "TGA", "ATC", "TAA"]), constraints=[AvoidStopCodons()], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass() assert "*" not in translate(problem.sequence)
def test_codon_optimize_as_hard_constraint(): numpy.random.seed(123) problem = DnaOptimizationProblem( sequence=random_dna_sequence(2000, seed=123), constraints=[ EnforceTranslation(location=Location(1000, 1300)), CodonOptimize(location=Location(1000, 1300), species='e_coli') ] ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AvoidPattern_with_jaspar_motifs(): stringio = StringIO(JASPAR_CONTENT) motif_patterns = MotifPssmPattern.list_from_file(stringio, file_format="jaspar", relative_threshold=0.9) problem = DnaOptimizationProblem( sequence="GGGGGGGGGGTGCGTGATTAAAGGGGG", constraints=[AvoidPattern(p) for p in motif_patterns], ) assert 2 == len(problem.constraints_evaluations().all_locations()) problem.resolve_constraints() assert problem.all_constraints_pass()
def test_basics(): numpy.random.seed(123) probas = {'A': 0.2, 'T': 0.2, 'G': 0.3, 'C': 0.3} problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, probas=probas, seed=123), constraints=[ AvoidPattern(enzyme="BsaI"), EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50) ]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AvoidPattern_with_regular_expression(): sequence = ("ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTG" "GTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGCGCGGC" "GAGGGCGAGGGCGATGCCACCAACGGCAAGCTGACCCTGAAGTTCATC") problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation(), AvoidPattern(r"GGT(.*)GAT")], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_avoid_blast_matches(): avoided_seqs = [ "GTCCTCATGCGAAAGCTACGATCGCCAACCCTGT", "ACCCACCTCGTTACGTCCACGGCACGAGGAATGATCTCGAGTTGCTTT" ] constraint = AvoidBlastMatches(sequences=avoided_seqs, min_align_length=8) problem = DnaOptimizationProblem(sequence=sequence, constraints=[constraint]) assert not problem.all_constraints_pass() cst_eval = constraint.evaluate(problem) assert len(cst_eval.locations) == 10 problem.resolve_constraints() assert problem.all_constraints_pass()
def test_avoid_matches_with_phage(): PHAGE_TAXID = "697289" collection = GenomeCollection() index = collection.get_taxid_bowtie_index_path(PHAGE_TAXID, version="1") problem = DnaOptimizationProblem( sequence=random_dna_sequence(30, seed=123), constraints=[AvoidMatches(bowtie_index=index, match_length=10)], logger=None, ) all_breaches = problem.constraints_evaluations().all_locations() assert len(all_breaches) == 5 problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AllowPrimer(): primers = ["ATTGCGCCAAACT", "TAATCCACCCTAAT", "ATTCACACTTCAA"] problem = DnaOptimizationProblem(sequence=40 * "A", constraints=[ AllowPrimer( tmin=50, tmax=60, max_homology_length=5, location=(10, 30), avoid_heterodim_with=primers) ]) problem.resolve_constraints() assert problem.all_constraints_pass()
def test_EnforceGCContents(): numpy.random.seed(123) problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, seed=123), constraints=[ AvoidPattern(enzyme="BsaI"), EnforceGCContent(mini=0.3, maxi=0.7, window=50) ], objectives=[EnforceGCContent(target=0.4)] ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_EnforceTranlationReversed(): numpy.random.seed(1234) sequence = reverse_translate(random_protein_sequence(50, seed=123)) rev_sequence = reverse_complement(sequence) problem = DnaOptimizationProblem( sequence=rev_sequence, constraints=[ AvoidPattern("AGC"), EnforceTranslation(location=(0, len(sequence), -1)) ], ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_basics(): numpy.random.seed(123) probas = {"A": 0.2, "T": 0.2, "G": 0.3, "C": 0.3} problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, probas=probas, seed=123), constraints=[ AvoidPattern("BsaI_site"), EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50), ], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AvoidChanges_with_max_edits(): numpy.random.seed(1) problem = DnaOptimizationProblem( sequence="ATATATATATA", constraints=[ AvoidChanges(max_edits=2), AvoidPattern("ATATA"), EnforcePatternOccurence("A", occurences=6, location=(0, 11, 1)), EnforcePatternOccurence("T", occurences=4, location=(0, 11, 1)), ], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()