def test_constraints_reports(): genbank_dir = os.path.join("tests", "data", "10_emma_genbanks") records = [ dc.load_record(os.path.join(genbank_dir, filename), name=filename) for filename in os.listdir(genbank_dir) ] # DEFINE THE CONSTRAINTS TO BE CHECKED ON EACH RECORD constraints = [ dc.AvoidPattern("BsaI_site"), dc.AvoidPattern("BsmBI_site"), dc.AvoidPattern("BbsI_site"), dc.AvoidPattern("8x1mer"), dc.AvoidPattern("5x3mer"), dc.AvoidPattern("9x2mer"), dc.AvoidHairpins(stem_size=20, hairpin_window=200), dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100), ] # CREATE A SPREADSHEET AND PLOTS OF THE BREACHES dataframe = cr.constraints_breaches_dataframe(constraints, records) records = cr.records_from_breaches_dataframe(dataframe, records) assert sum([len(r.features) for r in records]) == 157 pdf_data = cr.breaches_records_to_pdf(records) assert 70000 < len(pdf_data) < 72000
def test_optimization_2(): sequence_path = os.path.join("tests", "data", "test_optimization_sequence_2.fa") sequence = str(load_record(sequence_path).seq)[:5500] deluxe_dna = CommercialDnaOffer( name="DeluxeDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=4000)], pricing=PerBasepairPricing(0.20), lead_time=10, ) cheap_dna = CommercialDnaOffer( name="CheapDNA.com", sequence_constraints=[ NoPatternConstraint(enzyme="BsaI"), EnforceGCContent(0.3, 0.7, window=60), ], pricing=PerBasepairPricing(0.10), lead_time=15, ) # BLOCKS TO CHUNKS ASSEMBLY gibson_blocks_assembly_station = DnaAssemblyStation( name="Gibson Blocks Assembly", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeSegmentSelector(10), min_segment_length=1000, max_segment_length=6000, duration=8, cost=16, ), supplier=[deluxe_dna, cheap_dna], coarse_grain=30, fine_grain=False, memoize=True, # a_star_factor="auto", ) quote_before = gibson_blocks_assembly_station.get_quote(sequence) assert quote_before.price > 850 objective = OptimizeManufacturability(gibson_blocks_assembly_station) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation(location=(0, 4998))], objectives=[objective], ) problem.randomization_threshold = 0 # Forces "random search" mode problem.max_random_iters = 5 problem.optimize() print("OPTIMIZATION DONE, GENERATING REPORT") quote_after = gibson_blocks_assembly_station.get_quote(problem.sequence) assert quote_after.price < 580
def test_cuba_example_1(): path = os.path.join('tests', 'tests_from_genbanks', 'genbanks', 'cuba_example_1.gbk') record = load_record(path) problem = DnaOptimizationProblem.from_record(record) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass() assert problem.objective_scores_sum() < -100 problem.optimize() assert problem.objective_scores_sum() > -0.1
def test_optimization_1(): company_ingen = CommercialDnaOffer( name="Company InGen", pricing=PerBasepairPricing(0.08), sequence_constraints=[NoPatternConstraint(enzyme="AarI")], ) company_delux = CommercialDnaOffer( name="Company Delux", pricing=PerBasepairPricing(0.66), sequence_constraints=[], ) assembly_station = DnaAssemblyStation( name="Gibson Assembly Station", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeSegmentSelector(20), min_segment_length=200, max_segment_length=1200, ), supplier=[company_ingen, company_delux], coarse_grain=20, # a_star_factor="auto", ) sequence_path = os.path.join("tests", "data", "test_optimization_sequence_1.fa") sequence = load_record(sequence_path) objective = OptimizeManufacturability(assembly_station) problem = DnaOptimizationProblem(sequence=sequence, objectives=[objective]) quote = objective.get_quote(problem) score = problem.objective_scores_sum() assert -367 < score < -366 problem.randomization_threshold = 0 problem.max_random_iters = 5 problem.optimize() score = problem.objective_scores_sum() assert -244 < score < -243
DnaOptimizationProblem, load_record, Location, EnforceTranslation, EnforceGCContent, AvoidPattern, ) print("DOWNLOADING AND PARSING THE GENBANK DATA...") url = ( "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?" + "db=nucleotide&id=48994873&rettype=gb&retmode=txt" ) genbank_data = request.urlopen(url).read().decode("utf-8") genbank_record = load_record(StringIO(genbank_data), file_format="genbank") print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...") constraints = [] for feature in genbank_record.features: if feature.type == "gene" and len(feature.location.parts) == 1: location = Location.from_biopython_location(feature.location) if (len(location) % 3 == 0) and len(location) > 100: gene_constraints = [ EnforceTranslation(location = location), AvoidPattern("BsmBI_site", location), EnforceGCContent( mini=0.40, maxi=0.60, window=150, location=location ), ]
"""Example of use of the AvoidChanges as an objective to minimize modifications of a sequence.""" import os from dnachisel import (AvoidBlastMatches, random_dna_sequence, DnaOptimizationProblem, load_record) sequence_path = os.path.join("tests", "data", "example_sequence.gbk") sequence = str(load_record(sequence_path).seq.upper()) def test_avoid_blast_matches(): avoided_seqs = [ "GTCCTCATGCGAAAGCTACGATCGCCAACCCTGT", "ACCCACCTCGTTACGTCCACGGCACGAGGAATGATCTCGAGTTGCTTT" ] constraint = AvoidBlastMatches(sequences=avoided_seqs, min_align_length=8) problem = DnaOptimizationProblem(sequence=sequence, constraints=[constraint]) assert not problem.all_constraints_pass() cst_eval = constraint.evaluate(problem) assert len(cst_eval.locations) == 10 problem.resolve_constraints() assert problem.all_constraints_pass()
sns.set() # DEFINE HOW OPTIMIZATION PROBLEMS ARE CREATED specifications = { "~keep": dc.AvoidChanges(), "~no(CG)": dc.AvoidPattern("CG"), "~codon_optimize": dc.CodonOptimize(species="e_coli"), "~unique_kmers": dc.UniquifyAllKmers(20), "~gc(39%)": dc.EnforceGCContent(target=0.39, window=200), } class_to_label = { spec.__class__: label for label, spec in specifications.items() } sequence = dc.load_record("record.gb") def create_problem(boost_profile): location = dc.Location(1000, 9247) objectives = [] for spec_name, boost in boost_profile.items(): spec = specifications[spec_name] spec = spec.copy_with_changes(boost=boost, location=location) objectives.append(spec) return dc.DnaOptimizationProblem( sequence, constraints=[dc.EnforceTranslation(location=location)], objectives=objectives, )
AvoidPattern, AvoidChanges, EnforceTranslation, HomopolymerPattern, EnforceGCContent, CodonOptimize, load_record, ) from io import StringIO import urllib # DOWNLOAD THE PLASMID FROM THE WEB (it is a 7kb plasmid with 3 genes) url = "http://www.stevekellylab.com/constructs/pDex/pDex577-G.gb" response = urllib.request.urlopen(url) record_file = StringIO(response.read().decode("utf-8")) record = load_record(record_file, fmt="genbank") CDS_list = [(int(f.location.start), int(f.location.end), int(f.location.strand)) for f in record.features if f.type == "CDS"] # DEFINE CONSTRAINTS dna_provider_constraints = [ AvoidPattern("BsaI_site"), AvoidPattern("AarI_site"), AvoidPattern("9xA"), AvoidPattern("9xT"), AvoidPattern(HomopolymerPattern("6xG")), AvoidPattern(HomopolymerPattern("6xC")), EnforceGCContent(0.4, 0.65),
def test_genbank_import_from_record_unknown_specs(): record = load_record(example_sequence_path) with pytest.raises(TypeError): _ = DnaOptimizationProblem.from_record( record, specifications_dict={} )
def test_genbank_import_from_record(): record = load_record(example_sequence_path) problem = DnaOptimizationProblem.from_record(record) assert len(problem.constraints) == 5 assert len(problem.objectives) == 3
from urllib import request from io import StringIO from dnachisel import (DnaOptimizationProblem, load_record, Location, EnforceTranslation, EnforceGCContent, AvoidPattern) print("DOWNLOADING AND PARSING THE GENBANK DATA...") url="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?" + \ "db=nucleotide&id=48994873&rettype=gb&retmode=txt" genbank_data = request.urlopen(url).read().decode('utf-8') genbank_record = load_record(StringIO(genbank_data), fmt="genbank") print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...") constraints = [] for feature in genbank_record.features: if feature.type == 'gene' and len(feature.location.parts) == 1: location = Location.from_biopython_location(feature.location) if (len(location) % 3 == 0) and len(location) > 100: constraints.extend([ EnforceTranslation(location), AvoidPattern('BsmBI_site', location), EnforceGCContent(mini=0.40, maxi=0.60, window=150, location=location) ]) problem = DnaOptimizationProblem(genbank_record, constraints) print("RESOLVING THE CONSTRAINTS...")
import dnachisel as dc import dnachisel.reports.constraints_reports as cr import os # IMPORT THE 10 RECORDS FROM THE genbanks/ FOLDER records = [ dc.load_record(os.path.join("genbanks", filename), name=filename) for filename in os.listdir("genbanks") ] # DEFINE THE CONSTRAINTS TO BE CHECKED ON EACH RECORD constraints = [ dc.AvoidPattern("BsaI_site"), dc.AvoidPattern("BsmBI_site"), dc.AvoidPattern("BbsI_site"), dc.AvoidPattern("8x1mer"), dc.AvoidPattern("5x3mer"), dc.AvoidPattern("9x2mer"), dc.AvoidHairpins(stem_size=20, hairpin_window=200), dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100), ] # CREATE A SPREADSHEET AND PLOTS OF THE BREACHES dataframe = cr.constraints_breaches_dataframe(constraints, records) dataframe.to_excel("breaches.xlsx") records = cr.records_from_breaches_dataframe(dataframe, records) cr.breaches_records_to_pdf(records, "breaches_plots.pdf")
much improved. The final sequence (with the original annotations) is exported to Genbank. """ from dnachisel import (DnaOptimizationProblem, AvoidPattern, AvoidChanges, EnforceTranslation, HomopolymerPattern, EnforceGCContent, CodonOptimize, load_record) from io import StringIO import urllib # DOWNLOAD THE PLASMID FROM THE WEB (it is a 7kb plasmid with 3 genes) url = "http://www.stevekellylab.com/constructs/pDex/pDex577-G.gb" response = urllib.request.urlopen(url) record_file = StringIO(response.read().decode('utf-8')) record = load_record(record_file, fmt='genbank') CDS_list = [(int(f.location.start), int(f.location.end), int(f.location.strand)) for f in record.features if f.type == "CDS"] # DEFINE CONSTRAINTS dna_provider_constraints = [ AvoidPattern("BsaI_site"), AvoidPattern("AarI_site"), AvoidPattern(HomopolymerPattern("A", 9)), AvoidPattern(HomopolymerPattern("T", 9)), AvoidPattern(HomopolymerPattern("G", 6)), AvoidPattern(HomopolymerPattern("C", 9)), EnforceGCContent(0.4, 0.65),