def test_emma_construct(): emma_collection = GoldenGatePartsLibrary("EMMA", fasta_file=EMMA_PATH, memoize=True) company_ingen = CommercialDnaOffer( name="InGen", pricing=PerBasepairPricing(0.14), sequence_constraints=[SequenceLengthConstraint(max_length=2400)]) company_tdi = CommercialDnaOffer( name="TDI", pricing=PerBasepairPricing(0.08), sequence_constraints=[SequenceLengthConstraint(max_length=600)]) assembly_station = DnaAssemblyStation( name='GoldenGate Assembly Station', assembly_method=GoldenGateAssemblyMethod( min_segment_length=40, max_segment_length=5000, enzyme='BsmBI', # max_fragments=8 ), supplier=[company_ingen, emma_collection, company_tdi], coarse_grain=100, fine_grain=10, logger='bar', a_star_factor='auto') record = load_record(SEQUENCE_PATH) sequence = str(record.seq) quote = assembly_station.get_quote(sequence, with_assembly_plan=True) emma_parts = [ p for p in list(quote.assembly_plan.values()) if p.source.name == 'EMMA' ] assert len(emma_parts) == 6
def test_optimization_2(): sequence_path = os.path.join("tests", "data", "test_optimization_sequence_2.fa") sequence = str(load_record(sequence_path).seq)[:5500] deluxe_dna = CommercialDnaOffer( name="DeluxeDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=4000)], pricing=PerBasepairPricing(0.20), lead_time=10, ) cheap_dna = CommercialDnaOffer( name="CheapDNA.com", sequence_constraints=[ NoPatternConstraint(enzyme="BsaI"), EnforceGCContent(0.3, 0.7, window=60), ], pricing=PerBasepairPricing(0.10), lead_time=15, ) # BLOCKS TO CHUNKS ASSEMBLY gibson_blocks_assembly_station = DnaAssemblyStation( name="Gibson Blocks Assembly", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeSegmentSelector(10), min_segment_length=1000, max_segment_length=6000, duration=8, cost=16, ), supplier=[deluxe_dna, cheap_dna], coarse_grain=30, fine_grain=False, memoize=True, # a_star_factor="auto", ) quote_before = gibson_blocks_assembly_station.get_quote(sequence) assert quote_before.price > 850 objective = OptimizeManufacturability(gibson_blocks_assembly_station) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation(location=(0, 4998))], objectives=[objective], ) problem.randomization_threshold = 0 # Forces "random search" mode problem.max_random_iters = 5 problem.optimize() print("OPTIMIZATION DONE, GENERATING REPORT") quote_after = gibson_blocks_assembly_station.get_quote(problem.sequence) assert quote_after.price < 580
def test_optimization_1(): company_ingen = CommercialDnaOffer( name="Company InGen", pricing=PerBasepairPricing(0.08), sequence_constraints=[NoPatternConstraint(enzyme="AarI")], ) company_delux = CommercialDnaOffer( name="Company Delux", pricing=PerBasepairPricing(0.66), sequence_constraints=[], ) assembly_station = DnaAssemblyStation( name="Gibson Assembly Station", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeSegmentSelector(20), min_segment_length=200, max_segment_length=1200, ), supplier=[company_ingen, company_delux], coarse_grain=20, # a_star_factor="auto", ) sequence_path = os.path.join("tests", "data", "test_optimization_sequence_1.fa") sequence = load_record(sequence_path) objective = OptimizeManufacturability(assembly_station) problem = DnaOptimizationProblem(sequence=sequence, objectives=[objective]) quote = objective.get_quote(problem) score = problem.objective_scores_sum() assert -367 < score < -366 problem.randomization_threshold = 0 problem.max_random_iters = 5 problem.optimize() score = problem.objective_scores_sum() assert -244 < score < -243
def test_simple_gibson_assembly_station(): dna_provider = CommercialDnaOffer(name="Company InGen", pricing=PerBasepairPricing(0.08)) assembly_station = DnaAssemblyStation( name='Gibson Assembly Station', assembly_method=GibsonAssemblyMethod( overhang_selector=TmOverhangSelector(), min_segment_length=300, max_segment_length=1200), dna_source=dna_provider, coarse_grain=10, ) sequence = random_dna_sequence(5000, seed=1234) quote = assembly_station.get_quote(sequence, with_assembly_plan=True) assert quote.accepted assert 405.7 < quote.price < 405.8
For instance if you increase `cuts_number_penalty` to 500 you will see the number of segments to order fall from 7 to just 3. """ from dnaweaver import (CommercialDnaOffer, SequenceLengthConstraint, PerBasepairPricing, NoPatternConstraint, DnaAssemblyStation, GibsonAssemblyMethod, DnaSourcesComparator, random_dna_sequence, TmOverhangSelector) cheap_dna_offer = CommercialDnaOffer( name="CheapDNA.com", sequence_constraints=[ NoPatternConstraint("GGTCTC"), SequenceLengthConstraint(max_length=4000) ], pricing=PerBasepairPricing(per_basepair_price=0.10), ) deluxe_dna_offer = CommercialDnaOffer( name="DeluxeDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=3000)], pricing=PerBasepairPricing(per_basepair_price=0.20), ) assembly_station = DnaAssemblyStation( name="Gibson Assembly Station", assembly_method=GibsonAssemblyMethod(overhang_selector=TmOverhangSelector( min_size=18, max_size=22, min_tm=55, max_tm=65,
# DEFINE THE SUPPLY NETWORK # The EMMA collection of mammalian genetic parts emma_collection = GoldenGatePartsLibrary( "EMMA", parts_dict={ record.id: str(record.seq) for record in SeqIO.parse("emma_parts.fa", "fasta") }, memoize=True, ) # A medium-price vendor who can provide long parts company_ingen = CommercialDnaOffer( name="DeluxeDNA", pricing=PerBasepairPricing(0.14), sequence_constraints=[SequenceLengthConstraint(max_length=2400)], ) # A cheap vendor who can provide small parts < 1kb company_tdi = CommercialDnaOffer( name="CheapDNA", pricing=PerBasepairPricing(0.08), sequence_constraints=[SequenceLengthConstraint(max_length=1000)], ) # An oligos vendor (for oligo assembly and ) company_oligo = CommercialDnaOffer( name="Oligo vendor", pricing=FixedCostPricing(5), sequence_constraints=[SequenceLengthConstraint(max_length=100)],
-------------- If we increase the nucleotide resolution of our solver to 50 (to make it faster) the solver will overlook the cut in location 1240, and therefore come to the conclusion that there is no solution to the problem. In and other example (golden_gate_with_forced_cuts_example.py) we show a smarter way to solve this problem by forcing the location of certain cuts. """ from dnaweaver import (CommercialDnaOffer, GoldenGateAssemblyMethod, PerBasepairPricing, DnaAssemblyStation) from dnachisel import random_dna_sequence, enzyme_pattern, AvoidPattern company = CommercialDnaOffer( name="Company InGen", sequence_constraints=[AvoidPattern(enzyme='AarI')], pricing=PerBasepairPricing(0.08) ) assembly_station = DnaAssemblyStation( name='GoldenGate Assembly Station', assembly_method=GoldenGateAssemblyMethod( min_segment_length=50, max_segment_length=2000 ), dna_source=company, coarse_grain=50, logger='bars' ) sequence = random_dna_sequence(4000, seed=123) sites_locations = enzyme_pattern("AarI").find_matches(sequence) enzyme_sites_centres = [(l.start + l.end) // 2 for l in sites_locations]
def test_full_report(): # OLIGO COMPANIES a_star_factor = 'auto' memoize = True oligo_com = CommercialDnaOffer( name="Oligo.com", sequence_constraints=[SequenceLengthConstraint(max_length=200)], pricing=PerBasepairPricing(0.10), lead_time=7 ) deluxe_dna_com = CommercialDnaOffer( name="DeluxeDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=4000)], pricing=PerBasepairPricing(0.20), lead_time=10 ) cheap_dna_com = CommercialDnaOffer( name="CheapDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=4000), NoPatternConstraint(enzyme='AarI'), NoPatternConstraint(enzyme='BsaI'), lambda seq: (0.4 < gc_content(seq) < 0.6) ], pricing=PerBasepairPricing(0.10), lead_time=15 ) # OLIGOS TO BLOCKS ASSEMBLY oligo_assembly_station = DnaAssemblyStation( name="Oligo Assembly Station", assembly_method=BuildAGenomeAssemblyMethod( overhang_selector=TmOverhangSelector( min_size=15, max_size=25, min_tm=50, max_tm=70), min_segment_length=40, max_segment_length=200, sequence_constraints=[SequenceLengthConstraint(max_length=1500)], duration=8, cost=2 ), dna_source=oligo_com, coarse_grain=20, fine_grain=False, a_star_factor=a_star_factor ) # BLOCKS TO CHUNKS ASSEMBLY blocks_sources_comparator = DnaSourcesComparator( name='bs_comparator', suppliers=[ oligo_assembly_station, cheap_dna_com, deluxe_dna_com ], memoize=memoize ) gibson_blocks_assembly_station = DnaAssemblyStation( name="Gibson Blocks Assembly", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeOverhangSelector(80), min_segment_length=1000, max_segment_length=4000, duration=8, cost=16 ), dna_source=blocks_sources_comparator, coarse_grain=300, fine_grain=False, memoize=memoize, a_star_factor=a_star_factor ) goldengate_blocks_assembly_station = DnaAssemblyStation( name="Golden Gate Blocks Assembly", assembly_method=GoldenGateAssemblyMethod( enzyme='BsmBI', wildcard_basepair="A", min_segment_length=1000, max_segment_length=4000, duration=5, cost=6 ), dna_source=blocks_sources_comparator, coarse_grain=400, fine_grain=False, memoize=memoize, a_star_factor=a_star_factor ) ecoli_genome = PcrOutStation( "E. coli Genome (PCR)", primers_dna_source=oligo_com, blast_database=ECOLI_DB_PATH, max_amplicon_length=10000, extra_time=3, extra_cost=1 ) # CHUNKS TO MEGACHUNKS ASSEMBLY chunks_assembly_station = DnaAssemblyStation( name="Chunks assembly (Gibson)", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeOverhangSelector(300), min_segment_length=7000, max_segment_length=25000, duration=8 ), dna_source=DnaSourcesComparator([ ecoli_genome, goldengate_blocks_assembly_station, gibson_blocks_assembly_station, ]), coarse_grain=1000, fine_grain=None, logger='bars', a_star_factor=a_star_factor, memoize=memoize ) with open(SEQUENCE_PATH, "r") as f: sequence = f.read() import time t0 = time.time() ecoli_genome.pre_blast(sequence) quote = chunks_assembly_station.get_quote( sequence, with_assembly_plan=True) t1 = time.time() print("ELAPSED:", "%.02f" % (t1 - t0)) print(quote) if quote.accepted: print(quote.assembly_step_summary()) assert (3500 < quote.price < 3600) quote.compute_full_assembly_tree() quote.compute_fragments_final_locations() json_quote = JsonQuote.from_dnaweaver_quote(quote) autocolor_quote_sources(json_quote) data = make_folder_report(json_quote, '@memory')
NoPatternConstraint, SequenceLengthConstraint, ) from dnaweaver.utils import OptimizeManufacturability import dnachisel import matplotlib.pyplot as plt with open("sequence_to_optimize.txt", "r") as f: sequence = f.read() deluxe_dna = CommercialDnaOffer( name="DeluxeDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=4000)], pricing=PerBasepairPricing(0.20), lead_time=10, ) cheap_dna = CommercialDnaOffer( name="CheapDNA.com", sequence_constraints=[ NoPatternConstraint(enzyme="BsaI"), dnachisel.EnforceGCContent(0.3, 0.7, window=60), ], pricing=PerBasepairPricing(0.10), lead_time=15, ) # BLOCKS TO CHUNKS ASSEMBLY
def test_lead_time_limit(): # OLIGO COMPANIES a_star_factor = "auto" memoize = True oligo_com = CommercialDnaOffer( name="Oligo.com", sequence_constraints=[SequenceLengthConstraint(max_length=200)], pricing=PerBasepairPricing(0.10), lead_time=7, ) deluxe_dna_com = CommercialDnaOffer( name="DeluxeDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=4000)], pricing=PerBasepairPricing(0.20), lead_time=10, ) cheap_dna_com = CommercialDnaOffer( name="CheapDNA.com", sequence_constraints=[ SequenceLengthConstraint(max_length=4000), NoPatternConstraint(enzyme="AarI"), NoPatternConstraint(enzyme="BsaI"), lambda seq: (0.4 < gc_content(seq) < 0.6), ], pricing=PerBasepairPricing(0.10), lead_time=15, ) # OLIGOS TO BLOCKS ASSEMBLY oligo_assembly_station = DnaAssemblyStation( name="Oligo Assembly Station", assembly_method=OligoAssemblyMethod( overhang_selector=TmSegmentSelector(min_size=15, max_size=25, min_tm=50, max_tm=70), min_segment_length=40, max_segment_length=200, sequence_constraints=[SequenceLengthConstraint(max_length=1500)], duration=8, cost=2, ), supplier=oligo_com, coarse_grain=20, fine_grain=False, a_star_factor=a_star_factor, ) # BLOCKS TO CHUNKS ASSEMBLY blocks_sources_comparator = DnaSuppliersComparator( name="bs_comparator", suppliers=[oligo_assembly_station, cheap_dna_com, deluxe_dna_com], memoize=memoize, ) gibson_blocks_assembly_station = DnaAssemblyStation( name="Gibson Blocks Assembly", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeSegmentSelector(80), min_segment_length=1000, max_segment_length=4000, duration=8, cost=16, ), supplier=blocks_sources_comparator, coarse_grain=300, fine_grain=False, memoize=memoize, a_star_factor=a_star_factor, ) goldengate_blocks_assembly_station = DnaAssemblyStation( name="Golden Gate Blocks Assembly", assembly_method=GoldenGateAssemblyMethod( enzyme="BsmBI", wildcard_basepair="A", min_segment_length=1000, max_segment_length=4000, duration=5, cost=6, ), supplier=blocks_sources_comparator, coarse_grain=400, fine_grain=False, memoize=memoize, a_star_factor=a_star_factor, ) ecoli_genome = PcrExtractionStation( "E. coli Genome (PCR)", primers_supplier=oligo_com, homology_selector=TmSegmentSelector(min_size=18, max_size=22, min_tm=55, max_tm=65), blast_database=ECOLI_DB_PATH, max_amplicon_length=10000, extra_time=3, extra_cost=1, ) # CHUNKS TO MEGACHUNKS ASSEMBLY chunks_assembly_station = DnaAssemblyStation( name="Chunks assembly (Gibson)", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeSegmentSelector(300), min_segment_length=7000, max_segment_length=25000, duration=8, ), supplier=DnaSuppliersComparator([ ecoli_genome, goldengate_blocks_assembly_station, gibson_blocks_assembly_station, ]), coarse_grain=1000, fine_grain=None, a_star_factor=a_star_factor, memoize=memoize, ) with open(SEQUENCE_PATH, "r") as f: sequence = f.read() import time t0 = time.time() chunks_assembly_station.prepare_network_on_sequence(sequence) quote = chunks_assembly_station.get_quote(sequence, max_lead_time=28, with_assembly_plan=True) t1 = time.time() print("ELAPSED:", "%.02f" % (t1 - t0)) print(quote) if quote.accepted: print(quote.assembly_step_summary()) assert 5540 < quote.price < 5550