def test_circular_sequence_optimize_with_report(tmpdir): """Test that the custom function of CircularDnaOptimizationProblems works. """ np.random.seed(123) # Until the feature gets more battle-test, we're making sure it works # across a range of sequences. dna_sequence = ( "CTC" + dc.random_dna_sequence(100) + "CGTCTC" + dc.random_dna_sequence(100) + "CGT" ) problem = dc.CircularDnaOptimizationProblem( sequence=dna_sequence, constraints=[ dc.AvoidPattern("BsmBI_site"), dc.EnforceGCContent( mini=0.4, maxi=0.6, location=(150, 250), window=50 ), dc.UniquifyAllKmers(k=9, location=(10, 100)), ], logger=None, ) target = os.path.join(str(tmpdir), "circular_with_solution") os.mkdir(target) assert os.listdir(target) == [] assert not problem.all_constraints_pass() success, message, data = problem.optimize_with_report(target) assert problem.all_constraints_pass() record = problem.to_record() assert str(record.seq) != dna_sequence
def test_circular_sequence_basic(): np.random.seed(123) # Until the feature gets more battle-test, we're making sure it works # across a range of sequences. for i in range(4): dna_sequence = ( "CTC" + dc.random_dna_sequence(100) + "CGTCTC" + dc.random_dna_sequence(100) + "CGT" ) problem = dc.CircularDnaOptimizationProblem( sequence=dna_sequence, constraints=[ dc.AvoidPattern("BsmBI_site"), dc.EnforceGCContent( mini=0.4, maxi=0.6, location=(150, 250), window=50 ), dc.UniquifyAllKmers(k=9, location=(10, 100)), ], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_whole_sequence_change_objective_100(): np.random.seed(123) problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(50), objectives=[dc.EnforceChanges()] ) problem.optimize() assert problem.number_of_edits() == 50
def test_EnforceRegionsCompatibility(): # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which # enzyme will be chosen and inserted in the sequence depends on the other # constraint on GC content numpy.random.seed(123) def compatibility_condition(location1, location2, problem): seq1 = location1.extract_sequence(problem.sequence) seq2 = location2.extract_sequence(problem.sequence) return sequences_differences(seq1, seq2) >= 2 locations = [(0, 4), (50, 54), (100, 104), (150, 154)] problem = DnaOptimizationProblem( sequence=random_dna_sequence(200, seed=123), constraints=[ EnforceRegionsCompatibility( locations=locations, compatibility_condition=compatibility_condition, condition_label="2bp difference", ), EnforceGCContent(mini=0.4, maxi=0.6, window=40), ], logger=None, ) assert not any([e.passes for e in problem.constraints_evaluations()]) problem.resolve_constraints() assert problem.all_constraints_pass() seq = problem.sequence assert [ sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2 for (s1, e1), (s2, e2) in itertools.combinations(locations, 2) ]
def make_restriction_part(part_length, left_overhang, right_overhang, enzyme, forbidden_enzymes, assembly_enzyme='BsmBI'): l_left = len(left_overhang) l_right = len(right_overhang) left_overhang_location = (0, l_left) right_overhang_location = (l_left + part_length, l_left + part_length + l_right) center_location = (l_left, l_left + part_length) core_sequence = (left_overhang + dc.random_dna_sequence(part_length) + right_overhang) enforce_enzyme = dc.EnforcePatternOccurence( enzyme=enzyme, location=center_location) problem = dc.DnaOptimizationProblem( sequence=core_sequence, constraints=[ dc.AvoidChanges(left_overhang_location), dc.AvoidChanges(right_overhang_location), ] + [enforce_enzyme] + [ dc.AvoidPattern(enzyme=enzyme_name) for enzyme_name in forbidden_enzymes + [assembly_enzyme] ] ) problem.resolve_constraints() core_sequence = dc.sequence_to_biopython_record(problem.sequence) for loc in [left_overhang_location, right_overhang_location]: dc.annotate_record(core_sequence, loc, 'overhang') site_location = enforce_enzyme.evaluate(problem).data['matches'][0] dc.annotate_record(core_sequence, site_location.to_tuple(), enzyme) assembly_site = Restriction.__dict__[assembly_enzyme].site flank = dc.sequence_to_biopython_record(assembly_site + 'A') dc.annotate_record(flank, label='flank') return flank + core_sequence + flank.reverse_complement()
def test_whole_sequence_change_constraint_100(): np.random.seed(123) problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(50), constraints=[dc.EnforceChanges()] ) assert problem.all_constraints_pass() # due to initial seq. constraining assert problem.number_of_edits() == 50
def test_enforce_changes_with_indices_as_constraint(): np.random.seed(123) indices = [10, 20] + list(range(30, 40)) + [44, 45, 46] problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(50), constraints=[dc.EnforceChanges(indices=indices)], ) assert problem.number_of_edits() == 15
def test_feature_to_spec(): sequence = random_dna_sequence(100) record = sequence_to_biopython_record(sequence) label = "@gc(40-60%/20bp) & @no(BsaI_site) & @keep" annotate_record(record, label=label) feature = record.features[0] specs = Specification.list_from_biopython_feature(feature) assert len(specs) == 3
def test_avoid_pattern_basics(): numpy.random.seed(123) problem = DnaOptimizationProblem(sequence=random_dna_sequence(10000, seed=123), constraints=[AvoidPattern(enzyme="BsaI")]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AvoidNonUniqueSegments_as_objective(): numpy.random.seed(123) sequence = random_dna_sequence(1000, seed=123) specification = AvoidNonUniqueSegments(8) problem = DnaOptimizationProblem(sequence=sequence, objectives=[specification]) problem.optimize() assert problem.objectives[0].evaluate(problem).passes
def test_AvoidNonuniqueSegments_as_constraint(): numpy.random.seed(123) sequence = random_dna_sequence(1000, seed=123) problem = DnaOptimizationProblem(sequence=sequence, constraints=[AvoidNonuniqueSegments(8)]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_SequenceLengthBounds(): for length, expected in [(750, True), (400, False), (1200, False)]: problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(length), constraints=[dc.SequenceLengthBounds(500, 800)], logger=None, ) assert problem.all_constraints_pass() == expected
def test_UniquifyAllKmers_as_objective(): numpy.random.seed(123) sequence = random_dna_sequence(1000, seed=123) specification = UniquifyAllKmers(8) problem = DnaOptimizationProblem(sequence=sequence, objectives=[specification], logger=None) problem.optimize() assert problem.objectives[0].evaluate(problem).passes
def test_UniquifyAllKmers_as_constraint(): numpy.random.seed(123) sequence = random_dna_sequence(1000, seed=123) problem = DnaOptimizationProblem(sequence=sequence, constraints=[UniquifyAllKmers(8)], logger=None) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_avoid_hairpin_basics(): numpy.random.seed(123) random_sequences = [random_dna_sequence(30) for i in range(10)] full_sequence = "".join([ seq for sequence in random_sequences for seq in (random_dna_sequence(50), sequence, random_dna_sequence(50), reverse_complement(sequence), random_dna_sequence(50)) ]) problem = DnaOptimizationProblem(full_sequence, constraints=[AvoidHairpins()]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_whole_sequence_change_constraint_4(): np.random.seed(123) problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(50), constraints=[dc.EnforceChanges(minimum=4)], ) print(problem.number_of_edits()) assert not problem.all_constraints_pass() problem.resolve_constraints() assert 6 >= problem.number_of_edits() >= 4
def test_codon_optimize_with_custom_table(): problem = DnaOptimizationProblem( sequence=random_dna_sequence(1200, seed=123), constraints=[EnforceTranslation()], objectives=[CodonOptimize( codon_usage_table=biotools.CODON_USAGE_TABLES['b_subtilis'])] ) assert (problem.objective_scores_sum() < -10) problem.optimize() assert (problem.objective_scores_sum() == 0)
def test_record_with_multispec_feature(): sequence = random_dna_sequence(100) record = sequence_to_biopython_record(sequence) label = "@gc(40-60%/20bp) & @no(BsaI_site) & @keep" annotate_record(record, label=label) problem = DnaOptimizationProblem.from_record(record) assert len(problem.constraints) == 3 c1, c2, c3 = problem.constraints assert c1.mini == 0.4 assert c2.pattern.name == "BsaI"
def test_optimize_with_report(tmpdir): problem = DnaOptimizationProblem(sequence=random_dna_sequence(10000, seed=123), constraints=[AvoidPattern('BsmBI_site')]) target = os.path.join(str(tmpdir), 'with_solution') os.mkdir(target) assert os.listdir(target) == [] success, message, data = problem.optimize_with_report(target) assert success assert os.listdir(target) != []
def test_codon_optimize_with_custom_table(): table = get_codons_table("b_subtilis") problem = DnaOptimizationProblem( sequence=random_dna_sequence(1200, seed=123), constraints=[EnforceTranslation()], objectives=[CodonOptimize(codon_usage_table=table)], logger=None, ) assert problem.objective_scores_sum() < -10 problem.optimize() assert problem.objective_scores_sum() == 0
def random_compatible_dna_sequence(sequence_length, constraints, probas=None, seed=None, max_random_iters=5000, logger='bar', **kwargs): sequence = dc.random_dna_sequence( sequence_length, probas=probas, seed=seed) problem = dc.DnaOptimizationProblem(sequence, constraints=constraints, logger=logger) problem.max_random_iters = max_random_iters problem.resolve_constraints(**kwargs) return problem.sequence
def test_optimization_with_report_no_solution(tmpdir): problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, seed=123), constraints=[AvoidPattern(enzyme='BsmBI'), AvoidChanges()] ) target = os.path.join(str(tmpdir), 'no_solution') os.mkdir(target) assert os.listdir(target) == [] success, message, data = optimization_with_report(target, problem) assert not success assert os.listdir(target) != []
def test_codon_optimize_as_hard_constraint(): numpy.random.seed(123) problem = DnaOptimizationProblem( sequence=random_dna_sequence(2000, seed=123), constraints=[ EnforceTranslation(location=Location(1000, 1300)), CodonOptimize(location=Location(1000, 1300), species='e_coli') ] ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_basics(): numpy.random.seed(123) probas = {'A': 0.2, 'T': 0.2, 'G': 0.3, 'C': 0.3} problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, probas=probas, seed=123), constraints=[ AvoidPattern(enzyme="BsaI"), EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50) ]) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_optimize_with_report_no_solution(tmpdir): problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, seed=123), constraints=[AvoidPattern("BsmBI_site"), AvoidChanges()], logger=None, ) target = os.path.join(str(tmpdir), "no_solution") os.mkdir(target) assert os.listdir(target) == [] success, message, data = problem.optimize_with_report(target) assert not success assert os.listdir(target) != []
def test_avoid_matches_with_phage(): PHAGE_TAXID = "697289" collection = GenomeCollection() index = collection.get_taxid_bowtie_index_path(PHAGE_TAXID, version="1") problem = DnaOptimizationProblem( sequence=random_dna_sequence(30, seed=123), constraints=[AvoidMatches(bowtie_index=index, match_length=10)], logger=None, ) all_breaches = problem.constraints_evaluations().all_locations() assert len(all_breaches) == 5 problem.resolve_constraints() assert problem.all_constraints_pass()
def test_basics(): numpy.random.seed(123) probas = {"A": 0.2, "T": 0.2, "G": 0.3, "C": 0.3} problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, probas=probas, seed=123), constraints=[ AvoidPattern("BsaI_site"), EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50), ], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_EnforceGCContents(): numpy.random.seed(123) problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, seed=123), constraints=[ AvoidPattern(enzyme="BsaI"), EnforceGCContent(mini=0.3, maxi=0.7, window=50) ], objectives=[EnforceGCContent(target=0.4)] ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_avoid_changes_with_indices_as_constraint(): numpy.random.seed(123) indices = [10, 20] + list(range(30, 40)) + [44, 45, 46] sequence = random_dna_sequence(50) problem = DnaOptimizationProblem( sequence=sequence, constraints=[AvoidChanges(indices=indices)], objectives=[EnforceChanges()], logger=None, ) problem.optimize() assert problem.number_of_edits() == 50 - 15
def create_new_primer(existing_primers): """Create a new primer based on the primers created so far""" problem = DnaOptimizationProblem( sequence=random_dna_sequence(length=20), constraints=[ AvoidHeterodimerization(existing_primers, tmax=3), AvoidPattern("3x3mer"), AvoidPattern("4xG"), ], objectives=[EnforceGCContent(target=0.6)], logger=None, ) problem.resolve_constraints() problem.optimize() return problem.sequence