def test_no_solution_error_exhaustive_search(): problem = DnaOptimizationProblem( sequence="TTTTTTT", constraints=[AvoidChanges((0, 4)), EnforceGCContent(mini=0.8)] ) with pytest.raises(NoSolutionError) as err: problem.resolve_constraints() assert 'Exhaustive search failed' in str(err.value)
def test_no_solution_error_frozen_region(): problem = DnaOptimizationProblem( sequence="AAAAATCGTCTCTTTT", constraints=[AvoidChanges(), AvoidPattern('BsmBI_site')] ) with pytest.raises(NoSolutionError) as err: problem.resolve_constraints() assert 'region that cannot be mutated' in str(err.value)
def test_no_solution_error_frozen_region(): problem = DnaOptimizationProblem( sequence="AAAAATCGTCTCTTTT", constraints=[AvoidChanges(), AvoidPattern(enzyme='BsmBI')]) with pytest.raises(NoSolutionError) as err: problem.resolve_constraints() assert 'Constraint breach in frozen region' in str(err.value)
def test_no_solution_error_random_search(): problem = DnaOptimizationProblem( sequence="TTTTTTTTTTTTTTTTTTTTTTTTTTTT", constraints=[AvoidChanges((0, 10)), EnforceGCContent(mini=0.8)] ) with pytest.raises(NoSolutionError) as err: problem.resolve_constraints() assert 'Random search did not' in str(err.value)
def test_pattern_and_reverse(): bsmbi = "CGTCTC" bsmbi_rev = "GAGACG" sequence = 10 * bsmbi + 25 * bsmbi_rev + 15 * bsmbi + 15 * bsmbi_rev problem = DnaOptimizationProblem(sequence, constraints=[AvoidPattern('BsmBI_site')], objectives=[AvoidChanges()]) problem.resolve_constraints() problem.optimize() assert sum(problem.sequence_edits_as_array()) < 70
def test_optimization_with_report_no_solution(tmpdir): problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, seed=123), constraints=[AvoidPattern(enzyme='BsmBI'), AvoidChanges()] ) target = os.path.join(str(tmpdir), 'no_solution') os.mkdir(target) assert os.listdir(target) == [] success, message, data = optimization_with_report(target, problem) assert not success assert os.listdir(target) != []
def test_avoid_changes_with_indices_as_objectives(): numpy.random.seed(123) indices = [10, 20] + list(range(30, 40)) + [44, 45, 46] sequence = random_dna_sequence(50) problem = DnaOptimizationProblem( sequence=sequence, objectives=[EnforceChanges(boost=0.5), AvoidChanges(indices=indices)], logger=None, ) problem.optimize() assert problem.number_of_edits() == 50 - 15 # 15 == len(indices) problem = DnaOptimizationProblem( sequence=sequence, objectives=[EnforceChanges(boost=1.5), AvoidChanges(indices=indices)], logger=None, ) problem.optimize() assert problem.number_of_edits() == 50
def test_optimize_with_report_no_solution(tmpdir): problem = DnaOptimizationProblem( sequence=random_dna_sequence(10000, seed=123), constraints=[AvoidPattern("BsmBI_site"), AvoidChanges()], logger=None, ) target = os.path.join(str(tmpdir), "no_solution") os.mkdir(target) assert os.listdir(target) == [] success, message, data = problem.optimize_with_report(target) assert not success assert os.listdir(target) != []
def test_AvoidChanges_with_max_edits(): numpy.random.seed(1) problem = DnaOptimizationProblem( sequence="ATATATATATA", constraints=[ AvoidChanges(max_edits=2), AvoidPattern("ATATA"), EnforcePatternOccurence("A", occurences=6, location=(0, 11, 1)), EnforcePatternOccurence("T", occurences=4, location=(0, 11, 1)), ], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_avoid_change_as_objectives_basics(): numpy.random.seed(123) results = [] for boost in (0, 0.1, 0.2, 1): sequence = random_dna_sequence(1000, seed=123) problem = DnaOptimizationProblem( sequence=sequence, objectives=[ EnforceGCContent( mini=0.45, maxi=0.55, window=80).copy_with_changes(locations_span=300), AvoidChanges(boost=boost).as_passive_objective() ]) problem.optimize() differences = sequences_differences(problem.sequence, problem.sequence_before) results.append(differences) assert results[0] > 40 assert (results[0] > results[1] > results[2] > results[3]) assert results[-1] == 0
EnforceTranslation(), EnforceGCContent(mini=0.35, maxi=0.65, window=50), #TWIST: 25% and 65% GC ], objectives=[CodonOptimize(codon_usage_table=codon_table_11)], ) if taxid and not input_path: print("\nOptimizing codons for taxonomic ID: " + taxid) #Read gene fasta sequence and initiate optimizer if not protein_flag: problem = DnaOptimizationProblem( sequence=gene, constraints=[ #EnforceSequence(sequence = "ATG", location=(0, 2)), AvoidChanges(location=(0, 2)), AvoidPattern("BsmBI_site", "BamHI"), EnforceTranslation(), EnforceGCContent(mini=0.35, maxi=0.65, window=50), #TWIST: 25% and 65% GC ], objectives=[CodonOptimize(species=taxid)], ) #Output and reporting print("\nBefore optimization:") print(problem.constraints_text_summary()) print(problem.objectives_text_summary()) problem.resolve_constraints(final_check=True)
def domesticate( self, dna_sequence=None, protein_sequence=None, is_cds="default", codon_optimization=None, extra_constraints=(), extra_objectives=(), final_record_target=None, edit=False, barcode="", barcode_spacer="AA", report_target=None, ): """Domesticate a sequence. Parameters ---------- dna_sequence The DNA sequence string to domesticate. protein_sequence Amino-acid sequence of the protein, which will be converted into a DNA sequence string. is_cds If True, sequence edits are restricted to synonymous mutations. codon_optimization Either None for no codon optimization or the name of an organism supported by DnaChisel. extra_constraints List of extra constraints to apply to the domesticated sequences. Each constraint is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). extra_objectives List of extra optimization objectives to apply to the domesticated sequences. Each objective is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). final_record_target Path to the file where to write the final genbank. edit Turn to True to allow sequence edits (if it is false and no all constraints are originally satisfied, a failed domestication result (i.e. with attribute ``success`` set to False) will be returned. report_target Target for the sequence optimization report (a folder path, or a zip path). barcode A sequence of DNA that will be added to the left of the sequence once the domestication is done. barcode_spacer Nucleotides to be added between the barcode and the enzyme (optional, the idea here is that they will make sure to avoid the creation of unwanted cutting sites). Returns ------- final_record, edits_record, report_data, success, msg """ if is_cds == "default": is_cds = self.cds_by_default if isinstance(dna_sequence, SeqRecord): problem = DnaOptimizationProblem.from_record(dna_sequence) for spec in problem.constraints + problem.objectives: spec.location += len(self.left_flank) extra_constraints = list(extra_constraints) + problem.constraints extra_objectives = list(extra_constraints) + problem.objectives if protein_sequence is not None: is_cds = True dna_sequence = reverse_translate(protein_sequence) constraints = [ c(dna_sequence) if hasattr(c, "__call__") else c for c in list(extra_constraints) + self.constraints ] location = Location(len(self.left_flank), len(self.left_flank) + len(dna_sequence)) if is_cds: constraints.append(EnforceTranslation(location=location)) objectives = [ o(dna_sequence) if hasattr(o, "__call__") else o for o in list(extra_objectives) + self.objectives ] if codon_optimization: objectives.append( CodonOptimize(species=codon_optimization, location=location)) if self.minimize_edits: objectives.append(AvoidChanges()) extended_sequence = self.left_flank + dna_sequence + self.right_flank if (not is_cds) and (not edit): constraints.append(AvoidChanges()) problem = DnaOptimizationProblem( extended_sequence, constraints=constraints, objectives=objectives, logger=self.logger, ) all_constraints_pass = problem.all_constraints_pass() no_objectives = (len(problem.objectives) - self.minimize_edits) == 0 report_data = None optimization_successful = True message = "" # print (all_constraints_pass, no_objectives) if not (all_constraints_pass and no_objectives): problem.n_mutations = self.simultaneous_mutations if report_target is not None: (success, message, report_data) = problem.optimize_with_report( target=report_target, project_name=self.name) optimization_successful = success else: report_data = None try: problem.resolve_constraints() problem.optimize() except Exception as err: message = str(err) optimization_successful = False report_data = None final_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, ) edits_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, with_sequence_edits=True, ) if final_record_target is not None: SeqIO.write(final_record, final_record_target, "genbank") return DomesticationResult( problem.sequence_before, final_record, edits_record, report_data, optimization_successful, message, )
AvoidPattern("9xA"), AvoidPattern("9xT"), AvoidPattern(HomopolymerPattern("6xG")), AvoidPattern(HomopolymerPattern("6xC")), EnforceGCContent(0.4, 0.65), EnforceGCContent(0.25, 0.80, window=50), ] CDS_constraints = [] for (start, end, strand) in CDS_list: if strand == 1: promoter_region = (start - 30, start - 1) else: promoter_region = (end + 1, end + 30) CDS_constraints += [ AvoidChanges(promoter_region), EnforceTranslation((start, end, strand)), ] # DEFINE OBJECTIVES objectives = [EnforceGCContent(0.51, boost=10000)] + [ CodonOptimize("e_coli", location=(start, end, strand)) for (start, end, strand) in CDS_list ] # DEFINE AND SOLVE THE PROBLEM problem = DnaOptimizationProblem( sequence=record, constraints=dna_provider_constraints + CDS_constraints,
"""Example of use of the AvoidChanges as an objective to minimize modifications of a sequence.""" from dnachisel import (DnaOptimizationProblem, random_dna_sequence, AvoidPattern, AvoidChanges, sequences_differences, EnforceGCContent) # Note: we are not providing a location for AvoidChanges: it applies globally for boost in (0, 0.1, 1, 10.0): sequence = random_dna_sequence(1000, seed=123) problem = DnaOptimizationProblem( sequence=sequence, objectives=[ EnforceGCContent(mini=0.45, maxi=0.55, window=80), AvoidChanges(boost=boost).as_passive_objective() ]) problem.optimize() differences = sequences_differences(problem.sequence, problem.sequence_before) print("%d nucleotides modified for boost=%.1f" % (differences, boost))