def test_circular_sequence_basic(): np.random.seed(123) # Until the feature gets more battle-test, we're making sure it works # across a range of sequences. for i in range(4): dna_sequence = ( "CTC" + dc.random_dna_sequence(100) + "CGTCTC" + dc.random_dna_sequence(100) + "CGT" ) problem = dc.CircularDnaOptimizationProblem( sequence=dna_sequence, constraints=[ dc.AvoidPattern("BsmBI_site"), dc.EnforceGCContent( mini=0.4, maxi=0.6, location=(150, 250), window=50 ), dc.UniquifyAllKmers(k=9, location=(10, 100)), ], logger=None, ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_circular_sequence_optimize_with_report(tmpdir): """Test that the custom function of CircularDnaOptimizationProblems works. """ np.random.seed(123) # Until the feature gets more battle-test, we're making sure it works # across a range of sequences. dna_sequence = ( "CTC" + dc.random_dna_sequence(100) + "CGTCTC" + dc.random_dna_sequence(100) + "CGT" ) problem = dc.CircularDnaOptimizationProblem( sequence=dna_sequence, constraints=[ dc.AvoidPattern("BsmBI_site"), dc.EnforceGCContent( mini=0.4, maxi=0.6, location=(150, 250), window=50 ), dc.UniquifyAllKmers(k=9, location=(10, 100)), ], logger=None, ) target = os.path.join(str(tmpdir), "circular_with_solution") os.mkdir(target) assert os.listdir(target) == [] assert not problem.all_constraints_pass() success, message, data = problem.optimize_with_report(target) assert problem.all_constraints_pass() record = problem.to_record() assert str(record.seq) != dna_sequence
def test_constraints_reports(): genbank_dir = os.path.join("tests", "data", "10_emma_genbanks") records = [ dc.load_record(os.path.join(genbank_dir, filename), name=filename) for filename in os.listdir(genbank_dir) ] # DEFINE THE CONSTRAINTS TO BE CHECKED ON EACH RECORD constraints = [ dc.AvoidPattern("BsaI_site"), dc.AvoidPattern("BsmBI_site"), dc.AvoidPattern("BbsI_site"), dc.AvoidPattern("8x1mer"), dc.AvoidPattern("5x3mer"), dc.AvoidPattern("9x2mer"), dc.AvoidHairpins(stem_size=20, hairpin_window=200), dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100), ] # CREATE A SPREADSHEET AND PLOTS OF THE BREACHES dataframe = cr.constraints_breaches_dataframe(constraints, records) records = cr.records_from_breaches_dataframe(dataframe, records) assert sum([len(r.features) for r in records]) == 157 pdf_data = cr.breaches_records_to_pdf(records) assert 70000 < len(pdf_data) < 80000
def experiment_2(seed=123): np.random.seed(seed) sequence = dc.reverse_translate(dc.random_protein_sequence(1000)) problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforceTranslation(), dc.EnforceGCContent(mini=0.4, maxi=0.6, window=50), ], objectives=[dc.CodonOptimize(species="e_coli")], logger=None, ) problem.resolve_constraints() problem.optimize() return problem.sequence
def work(self): data = self.data figures = [] self.logger(message="Generating report...") records = records_from_data_files(data.files) constraints = [ dc.AvoidPattern("BsaI_site"), dc.AvoidPattern("BsmBI_site"), dc.AvoidPattern("BbsI_site"), dc.AvoidPattern("SapI_site"), dc.AvoidPattern("8x1mer"), dc.AvoidPattern("5x3mer"), dc.AvoidPattern("9x2mer"), dc.AvoidHairpins(stem_size=20, hairpin_window=200), dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100), dc.EnforceGCContent(mini=0.1, maxi=0.9, window=100), dc.UniquifyAllKmers(k=15), ] dataframe = cr.constraints_breaches_dataframe(constraints, records) spreadsheet_io = BytesIO() dataframe.to_excel(spreadsheet_io) records = cr.records_from_breaches_dataframe(dataframe, records) zipped_records = flametree.file_tree("@memory") if data.include_genbanks: for record in records: target = zipped_records._file("%s.gb" % record.id) write_record(record, target) pdf_io = BytesIO() cr.breaches_records_to_pdf(records, pdf_io, logger=self.logger) return { "pdf_report": { "data": data_to_html_data( pdf_io.getvalue(), "pdf", filename="manufacturability_report.pdf", ), "name": "manufacturability_report.pdf", "mimetype": "application/pdf", }, "records": { "data": data_to_html_data( zipped_records._close(), "zip", filename="manufacturability_annotated_records.zip", ), "name": "manufacturability_annotated_records.zip", "mimetype": "application/zip", }, "spreadsheet": { "data": data_to_html_data( spreadsheet_io.getvalue(), "xlsx", filename="manufacturability_report.xlsx", ), "name": "manufacturability_report.xlsx", "mimetype": "vnd.openxmlformats-officedocument.spreadsheetml.sheet", }, }
problem: - The sequence is designed to have a cross-origin BsmBI site that will need to be removed, because the location-less specification ``AvoidPattern`` is interpreted as applying to the full circle. - The specification ``EnforceGCContent`` is cross-origin since its location is 1500-2500, and the sequence is ~2000bp long. """ import dnachisel as dc dna_sequence = "CTC%sCGTCTC%sCGT" % ( dc.random_dna_sequence(1000), dc.random_dna_sequence(1000), ) constraints = [ dc.AvoidPattern("BsmBI_site"), dc.EnforceGCContent(mini=0.4, maxi=0.6, location=(1500, 2500), window=50), dc.UniquifyAllKmers(k=9, location=(10, 1000)), ] problem = dc.CircularDnaOptimizationProblem( sequence=dna_sequence, constraints=constraints ) print("BEFORE OPTIMIZATION:\n\n", problem.constraints_text_summary()) problem.resolve_constraints() print("AFTER OPTIMIZATION:\n\n", problem.constraints_text_summary())
from copy import deepcopy from collections import OrderedDict import dnachisel as dc import pandas import seaborn as sns sns.set() # DEFINE HOW OPTIMIZATION PROBLEMS ARE CREATED specifications = { "~keep": dc.AvoidChanges(), "~no(CG)": dc.AvoidPattern("CG"), "~codon_optimize": dc.CodonOptimize(species="e_coli"), "~unique_kmers": dc.UniquifyAllKmers(20), "~gc(39%)": dc.EnforceGCContent(target=0.39, window=200), } class_to_label = { spec.__class__: label for label, spec in specifications.items() } sequence = dc.load_record("record.gb") def create_problem(boost_profile): location = dc.Location(1000, 9247) objectives = [] for spec_name, boost in boost_profile.items(): spec = specifications[spec_name] spec = spec.copy_with_changes(boost=boost, location=location) objectives.append(spec) return dc.DnaOptimizationProblem(
with open("sequence_to_optimize.txt", "r") as f: sequence = f.read() deluxe_dna = CommercialDnaOffer( name="DeluxeDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=4000)], pricing=PerBasepairPricing(0.20), lead_time=10, ) cheap_dna = CommercialDnaOffer( name="CheapDNA.com", sequence_constraints=[ NoPatternConstraint(enzyme="BsaI"), dnachisel.EnforceGCContent(0.3, 0.7, window=60), ], pricing=PerBasepairPricing(0.10), lead_time=15, ) # BLOCKS TO CHUNKS ASSEMBLY gibson_blocks_assembly_station = DnaAssemblyStation( name="Gibson Blocks Assembly", assembly_method=GibsonAssemblyMethod( overhang_selector=TmSegmentSelector(), min_segment_length=1000, max_segment_length=6000, duration=8, cost=16,
import dnachisel.reports.constraints_reports as cr import os # IMPORT THE 10 RECORDS FROM THE genbanks/ FOLDER records = [ dc.load_record(os.path.join("genbanks", filename), name=filename) for filename in os.listdir("genbanks") ] # DEFINE THE CONSTRAINTS TO BE CHECKED ON EACH RECORD constraints = [ dc.AvoidPattern("BsaI_site"), dc.AvoidPattern("BsmBI_site"), dc.AvoidPattern("BbsI_site"), dc.AvoidPattern("8x1mer"), dc.AvoidPattern("5x3mer"), dc.AvoidPattern("9x2mer"), dc.AvoidHairpins(stem_size=20, hairpin_window=200), dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100), ] # CREATE A SPREADSHEET AND PLOTS OF THE BREACHES dataframe = cr.constraints_breaches_dataframe(constraints, records) dataframe.to_excel("breaches.xlsx") records = cr.records_from_breaches_dataframe(dataframe, records) cr.breaches_records_to_pdf(records, "breaches_plots.pdf") print("Done! Check breaches.xlsx and breaches_plots.pdf for results.")