def load_template(filename, insert, destination): ''' func descriptor ''' objectives = [] constraints = [] vector = SeqIO.read(filename, "genbank") vector, insert_location = insert_into_vector(vector, destination, insert) problem = DnaOptimizationProblem.from_record(vector) constraints += problem.constraints objectives += problem.objectives #feats = [feat.qualifiers for feat in vector.features] #dnachisel hasn't implemented MultiLocation yet #vector_location = FeatureLocation(insert_location.end, len(vector)) + FeatureLocation(0,insert_location.start) #vector_location_us = Location(0, insert_location.start, 1) #vector_location_ds = Location(insert_location.end, len(vector), 1) #constraints.append(EnforceTranslation(Location.from_biopython_location(insert_location))) #constraints.append(AvoidChanges(vector_location_us)) #constraints.append(AvoidChanges(vector_location_ds)) #This seq should be a SeqRecord object return vector, objectives, constraints
def work(self): data = self.data self.logger(message='Initializing...') if data.editFeatures: record = sequence_to_biopython_record(data.sequence.upper()) for feature in sorted(data.editedFeatures.values(), key=lambda f: (f.start, f.end)): annotate_record(record, feature_type="misc_feature", location=(feature.start, feature.end), label=feature.label) else: records, fmt = records_from_data_file(data.file) record = records[0] problem = DnaOptimizationProblem.from_record(record) problem.max_random_iters = 1000 problem.logger = self.logger success, summary, zip_data = optimization_with_report( target="@memory", problem=problem, project_name=record.id) return { 'zip_file': { 'data': data_to_html_data(zip_data, 'zip'), 'name': 'optimization_report.zip', 'mimetype': 'application/zip' }, 'success': success, 'summary': summary }
def work(self): data = self.data self.logger(message="Initializing...") if data.editFeatures: record = sequence_to_biopython_record(data.sequence.upper()) for feature in sorted(data.editedFeatures.values(), key=lambda f: (f.start, f.end)): annotate_record( record, feature_type="misc_feature", location=(feature.start, feature.end), label=feature.label, ) else: record = records_from_data_files([data.file])[0] problem = DnaOptimizationProblem.from_record(record, logger=self.logger) problem.optimization_stagnation_tolerance = 30 success, summary, zip_data = problem.optimize_with_report( target="@memory", project_name=record.id) return { "zip_file": { "data": data_to_html_data(zip_data, "zip"), "name": "optimization_report.zip", "mimetype": "application/zip", }, "success": success, "summary": summary, }
def test_rca_example(): """Test a Genbank with ~harmonize_rca feature.""" path = os.path.join("tests", "tests_from_genbanks", "genbanks", "rca_example.gb") problem = DnaOptimizationProblem.from_record(path) assert str(problem.objectives) == "[HarmonizeRCA[0-105(+)](e_coli -> h_sapiens)]" assert problem.objectives[0].original_species == "e_coli" assert problem.objectives[0].species == "h_sapiens" problem.optimize()
def test_record_with_multispec_feature(): sequence = random_dna_sequence(100) record = sequence_to_biopython_record(sequence) label = "@gc(40-60%/20bp) & @no(BsaI_site) & @keep" annotate_record(record, label=label) problem = DnaOptimizationProblem.from_record(record) assert len(problem.constraints) == 3 c1, c2, c3 = problem.constraints assert c1.mini == 0.4 assert c2.pattern.name == "BsaI"
def test_cuba_example_1(): path = os.path.join('tests', 'tests_from_genbanks', 'genbanks', 'cuba_example_1.gbk') record = load_record(path) problem = DnaOptimizationProblem.from_record(record) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass() assert problem.objective_scores_sum() < -100 problem.optimize() assert problem.objective_scores_sum() > -0.1
def test_cuba_example_1(): path = os.path.join( "tests", "tests_from_genbanks", "genbanks", "cuba_example_1.gbk" ) problem = DnaOptimizationProblem.from_record(path) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass() assert problem.objective_scores_sum() < -100 problem.optimize() assert problem.objective_scores_sum() > -0.1
def test_all_shorthands(): """This test compiles all shorthands as a check that nothing is broken.""" numpy.random.seed(123) sequence = random_dna_sequence(1000) record = sequence_to_biopython_record(sequence) annotate_record(record, (100, 900), label="@no(CATG)") annotate_record(record, (100, 900), label="@gc(40-60%)") annotate_record(record, (100, 900), label="@insert(AarI_site)") annotate_record(record, (650, 752), label="@cds") annotate_record(record, (100, 200), label="@keep") annotate_record(record, (250, 273), label="@primer") annotate_record(record, (250, 280), label="@change") annotate_record(record, (943, 950), label="@sequence(AKGNTKT)") annotate_record(record, (955, 958), label="@sequence(ATT|ATC|GGG)") problem = DnaOptimizationProblem.from_record(record) assert len(problem.constraints) == 13 # AllowPrimer counts for 4 specs. assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
"""Basic demo of the high-level method optimize_with_report.""" import os from dnachisel import DnaOptimizationProblem genbank_path = os.path.join("data", "example_sequence.gbk") report_folder = os.path.join("reports", "optimization_with_report") problem = DnaOptimizationProblem.from_record(genbank_path) success, message, _ = problem.optimize_with_report(target=report_folder) print(message + " A report was generated in " + report_folder)
def domesticate( self, dna_sequence=None, protein_sequence=None, is_cds="default", codon_optimization=None, extra_constraints=(), extra_objectives=(), final_record_target=None, edit=False, barcode="", barcode_spacer="AA", report_target=None, ): """Domesticate a sequence. Parameters ---------- dna_sequence The DNA sequence string to domesticate. protein_sequence Amino-acid sequence of the protein, which will be converted into a DNA sequence string. is_cds If True, sequence edits are restricted to synonymous mutations. codon_optimization Either None for no codon optimization or the name of an organism supported by DnaChisel. extra_constraints List of extra constraints to apply to the domesticated sequences. Each constraint is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). extra_objectives List of extra optimization objectives to apply to the domesticated sequences. Each objective is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). final_record_target Path to the file where to write the final genbank. edit Turn to True to allow sequence edits (if it is false and no all constraints are originally satisfied, a failed domestication result (i.e. with attribute ``success`` set to False) will be returned. report_target Target for the sequence optimization report (a folder path, or a zip path). barcode A sequence of DNA that will be added to the left of the sequence once the domestication is done. barcode_spacer Nucleotides to be added between the barcode and the enzyme (optional, the idea here is that they will make sure to avoid the creation of unwanted cutting sites). Returns ------- final_record, edits_record, report_data, success, msg """ if is_cds == "default": is_cds = self.cds_by_default if isinstance(dna_sequence, SeqRecord): problem = DnaOptimizationProblem.from_record(dna_sequence) for spec in problem.constraints + problem.objectives: spec.location += len(self.left_flank) extra_constraints = list(extra_constraints) + problem.constraints extra_objectives = list(extra_constraints) + problem.objectives if protein_sequence is not None: is_cds = True dna_sequence = reverse_translate(protein_sequence) constraints = [ c(dna_sequence) if hasattr(c, "__call__") else c for c in list(extra_constraints) + self.constraints ] location = Location(len(self.left_flank), len(self.left_flank) + len(dna_sequence)) if is_cds: constraints.append(EnforceTranslation(location=location)) objectives = [ o(dna_sequence) if hasattr(o, "__call__") else o for o in list(extra_objectives) + self.objectives ] if codon_optimization: objectives.append( CodonOptimize(species=codon_optimization, location=location)) if self.minimize_edits: objectives.append(AvoidChanges()) extended_sequence = self.left_flank + dna_sequence + self.right_flank if (not is_cds) and (not edit): constraints.append(AvoidChanges()) problem = DnaOptimizationProblem( extended_sequence, constraints=constraints, objectives=objectives, logger=self.logger, ) all_constraints_pass = problem.all_constraints_pass() no_objectives = (len(problem.objectives) - self.minimize_edits) == 0 report_data = None optimization_successful = True message = "" # print (all_constraints_pass, no_objectives) if not (all_constraints_pass and no_objectives): problem.n_mutations = self.simultaneous_mutations if report_target is not None: (success, message, report_data) = problem.optimize_with_report( target=report_target, project_name=self.name) optimization_successful = success else: report_data = None try: problem.resolve_constraints() problem.optimize() except Exception as err: message = str(err) optimization_successful = False report_data = None final_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, ) edits_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, with_sequence_edits=True, ) if final_record_target is not None: SeqIO.write(final_record, final_record_target, "genbank") return DomesticationResult( problem.sequence_before, final_record, edits_record, report_data, optimization_successful, message, )
def test_genbank_import_from_filepath(): problem = DnaOptimizationProblem.from_record(example_sequence_path) assert len(problem.constraints) == 5 assert len(problem.objectives) == 3
def test_genbank_import_from_record_unknown_specs(): record = load_record(example_sequence_path) with pytest.raises(TypeError): _ = DnaOptimizationProblem.from_record( record, specifications_dict={} )
EnforceTranslation(location=(500, 1400)) ], objectives=[CodonOptimize(species='e_coli', location=(500, 1400))]) # Solve the constraints, optimize with respect to the objective problem.resolve_constraints() problem.optimize() # Print sumarries to check that constraints # print(problem.constraints_text_summary()) # print(problem.objectives_text_summary()) # ADD NOTES problemTwo = DnaOptimizationProblem.from_record("my_record.gb") problemTwo.optimize_with_report(target="report.zip") ''' ===> SUCCESS - all constraints evaluations pass ✔PASS ┍ AvoidPattern[0-10000](pattern:BsaI(GGTCTC)) │ Passed. Pattern not found ! ✔PASS ┍ EnforceGCContent[0-10000](mini:0.30, maxi:0.70, window:50) │ Passed ! ✔PASS ┍ EnforceTranslation[500-1400(+)] │ Enforced by nucleotides restrictions ===> TOTAL OBJECTIVES SCORE: -8.83 -8.83 ┍ MaximizeCAI[500-1400](e_coli) │ Codon opt. on window 500-1400 scored -8.83E+00