def test_get_repo_part_names_by_collection(): repo = dc.SequenceRepository() repo.add_records([("id_1", "ATGC")], collection="parts") repo.add_records([("id_2", "ATGC")], collection="other") result = repo.get_part_names_by_collection(format="string") assert result == 'parts\n- id_1\nother\n- id_2'
def test_not_in_repository_error(): repo = dc.SequenceRepository() records = [("id_1", "ATGC"), ("id_2", "AAAAAT")] repo.add_records(records) with pytest.raises(dc.NotInRepositoryError): repo.get_records(['id_3', 'id_4'])
def test_repository_duplicate_error(): repo = dc.SequenceRepository() records = [("id_1", "ATGC"), ("id_2", "AAAAAT")] repo.add_records(records) with pytest.raises(dc.RepositoryDuplicateError): repo.add_records(records)
def test_autoselect_connectors(): repo = dc.SequenceRepository() repo.import_records( collection="parts", folder=os.path.join(this_directory, "parts"), use_file_names_as_ids=True, topology="circular" ) repo.import_records( collection="emma_connectors", folder=os.path.join(this_directory, "emma_connectors"), use_file_names_as_ids=True, ) all_parts = list(repo.collections["parts"].keys()) assembly = dc.Type2sRestrictionAssembly( parts=all_parts, connectors_collection="emma_connectors" ) simulation = assembly.simulate(sequence_repository=repo) assert len(simulation.construct_records) == 1 assert len(simulation.warnings) == 1 warning = simulation.warnings[0] # selected_connectors = mix.autoselect_connectors(connectors) assert sorted(warning.data["selected_connectors"]) == [ "conn_A_B", "conn_D-F", "conn_J-K", "conn_L-N", "conn_R-W", "conn_W-Z", ]
def extract_records_from_quote(quote): quote.compute_full_assembly_plan() records = { q.id: dc.sequence_to_biopython_record(q.sequence, id=q.id) for loc, q in quote.assembly_plan.items() } repo = dc.SequenceRepository(collections={'parts': records}) record_names = list(records.keys()) return record_names, repo
def test_BASIC_assembly(): repo = dc.SequenceRepository() repo.import_records(folder=parts_and_oligos_folder) plan = dc.AssemblyPlan.from_spreadsheet(path=assembly_plan_path, assembly_class="from_spreadsheet") simulation = plan.simulate(repo) stats = simulation.compute_stats() assert stats["valid_assemblies"] == 10 assert stats["errored_assemblies"] == 0
def test_single_gibson(): repository = dc.SequenceRepository() repository.import_records(files=[sequences_fasta]) parts = ["Frag_%d" % i for i in [1, 2, 3, 4, 5]] expected_record = repository.get_record("expected_sequence") assembly = dc.GibsonAssembly(parts=parts, homology_checker="default") simulation = assembly.simulate(sequence_repository=repository) assert len(simulation.construct_records) == 1 simulated_record = simulation.construct_records[0] assert sequences_are_circularly_equal([simulated_record, expected_record])
def test_type2s_hierarchical(): repository = dc.SequenceRepository() repository.import_records(folder=parts_folder, use_file_names_as_ids=True) assembly_plan = dc.AssemblyPlan.from_spreadsheet( assembly_class=dc.Type2sRestrictionAssembly, path=os.path.join(this_directory, "type2s_two-level.csv"), ) assert sorted(assembly_plan.levels) == [1, 2] plan_simulation = assembly_plan.simulate(sequence_repository=repository) stats = plan_simulation.compute_stats() assert stats["valid_assemblies"] == 4
def test_lcr_assembly(): repo = dc.SequenceRepository() repo.import_records(files=[oligos_fasta, parts_fasta]) plan = dc.AssemblyPlan.from_spreadsheet(path=assembly_plan_path) simulation = plan.simulate(repo) stats = simulation.compute_stats() assert stats["valid_assemblies"] == 1 # The second assembly is flawed on purpose assert stats["errored_assemblies"] == 1 # Coverage! simulation.write_report("@memory")
def test_hierarchical_biobrick(): repository = dc.SequenceRepository() repository.import_records(folder=parts_folder, use_file_names_as_ids=True) assembly_plan = dc.AssemblyPlan.from_spreadsheet( assembly_class=dc.BioBrickStandardAssembly, path=os.path.join(this_directory, "hierarchical_biobrick.csv"), ) plan_simulation = assembly_plan.simulate(sequence_repository=repository) stats = plan_simulation.compute_stats() assert stats["valid_assemblies"] == 3 report_writer = dc.AssemblyReportWriter(include_mix_graphs=True) plan_simulation.write_report("@memory", assembly_report_writer=report_writer)
def test_combinatorial_type2s(): repository = dc.SequenceRepository() repository.import_records(folder=parts_folder, use_file_names_as_ids=True) parts_list = list(repository.collections["parts"]) # EXPECT A SINGLE CONSTRUCT, GET AN ERROR assembly = dc.Type2sRestrictionAssembly(parts_list, expected_constructs=1) simulation = assembly.simulate(sequence_repository=repository) assert len(simulation.errors) == 1 assert len(simulation.construct_records) == 5 # DON'T EXPECT A CERTAIN NUMBER, GET NO ERROR assembly = dc.Type2sRestrictionAssembly(parts_list, expected_constructs="any_number") simulation = assembly.simulate(sequence_repository=repository) assert len(simulation.errors) == 0 assert len(simulation.construct_records) == 5 # LIMIT THE NUMBER OF CONSTRUCTS, GET LESS CONSTRUCTS, AND A WARNING assembly = dc.Type2sRestrictionAssembly( parts_list, max_constructs=3, expected_constructs="any_number", expect_no_unused_parts=False, ) simulation = assembly.simulate(sequence_repository=repository) assert len(simulation.errors) == 0 assert len(simulation.warnings) == 1 # Max constructs reached warning assert len(simulation.construct_records) == 3 # TEST RANDOMIZED ASSEMBLY PICKING assembly = dc.Type2sRestrictionAssembly( parts_list, max_constructs=2, expected_constructs="any_number", expect_no_unused_parts=False, randomize_constructs=True, ) simulation = assembly.simulate(sequence_repository=repository) assert len(simulation.errors) == 0 assert len(simulation.warnings) == 1 # Max constructs reached warning assert len(simulation.construct_records) == 2
def test_gibson_assembly_plan(): repository = dc.SequenceRepository() repository.import_records(files=[sequences_fasta]) assembly_plan = dc.AssemblyPlan.from_spreadsheet( assembly_class=dc.GibsonAssembly, path=os.path.join(this_directory, "gibson_plan.csv"), ) plan_simulation = assembly_plan.simulate(sequence_repository=repository) stats = plan_simulation.compute_stats() assert stats["valid_assemblies"] == 3 assert stats["errored_assemblies"] == 2 report_writer = dc.AssemblyReportWriter( include_mix_graphs=True, include_assembly_plots=True, show_overhangs_in_graph=True, annotate_parts_homologies=True, ) plan_simulation.write_report(target="@memory", assembly_report_writer=report_writer)
def test_single_assembly(tmpdir): repository = dc.SequenceRepository() repository.import_records(folder=parts_folder, use_file_names_as_ids=True) assembly_plan = dc.AssemblyPlan.from_spreadsheet( assembly_class=dc.Type2sRestrictionAssembly, path=os.path.join(this_directory, "type2s_two-level.csv"), ) plan_simulation = assembly_plan.simulate(sequence_repository=repository) stats = plan_simulation.compute_stats() report_writer = dc.AssemblyReportWriter( include_fragment_plots=False, include_part_plots=False, include_mix_graphs=False, include_assembly_plots=False, show_overhangs_in_graph=False, annotate_parts_homologies=False, include_pdf_report=True, ) plan_simulation.write_report(target="@memory", assembly_report_writer=report_writer)
def test_type2s_hierarchical_flawed(): repository = dc.SequenceRepository() repository.import_records(folder=parts_folder, use_file_names_as_ids=True) assembly_plan = dc.AssemblyPlan.from_spreadsheet( assembly_class=dc.Type2sRestrictionAssembly, path=os.path.join(this_directory, "type2s_two-level_flawed.xls"), ) assert sorted(assembly_plan.levels) == [1, 2] plan_simulation = assembly_plan.simulate(sequence_repository=repository) stats = plan_simulation.compute_stats() assert stats["valid_assemblies"] == 2 # Construct 1 and its dependant assert stats["errored_assemblies"] == 1 assert stats["cancelled_assemblies"] == 1 report_writer = dc.AssemblyReportWriter( include_fragment_plots=True, include_part_plots=True, include_mix_graphs=True, include_assembly_plots=True, show_overhangs_in_graph=True, annotate_parts_homologies=True, ) plan_simulation.write_report(target="@memory", assembly_report_writer=report_writer)
import dnacauldron as dc import os repository = dc.SequenceRepository() repository.import_records(folder="parts", use_file_names_as_ids=True) parts_list = list(repository.collections["parts"]) assembly = dc.Type2sRestrictionAssembly( name="randomized_combinatorial_asm", parts=parts_list, expected_constructs="any_number", randomize_constructs=True, max_constructs=2, ) simulation = assembly.simulate(sequence_repository=repository, ) output_path = os.path.join("output", "randomized_combinatorial") simulation.write_report(target=output_path) print("Done! see output/randomized_combinatorial folder for the results.")
def work(self): data = self.data logger = self.logger logger(message="Reading the Data...") # CHANGE THE VALUE OF THE BOOLEANS def yes_no(value): return {"yes": True, "no": False}.get(value, value) include_fragment_plots = yes_no(data.include_fragment_plots) include_graph_plots = yes_no(data.include_graph_plots) include_assembly_plots = yes_no(data.include_assembly_plots) report_writer = dc.AssemblyReportWriter( include_mix_graphs=include_graph_plots, include_assembly_plots=include_assembly_plots, include_fragment_plots=include_fragment_plots, ) # INITIALIZE ALL RECORDS IN A SEQUENCE REPOSITORY # INITIALIZE ALL RECORDS IN A SEQUENCE REPOSITORY logger(message="Parsing the sequences...") records = records_from_data_files( data.parts, use_file_names_as_ids=data.use_file_names_as_ids) repository = dc.SequenceRepository() for record in records: # location-less features can cause bug when concatenating records. record.features = [ f for f in record.features if f.location is not None and f.location.start <= f.location.end ] for r in records: set_record_topology(r, topology=data.topology) r.seq = r.seq.upper() repository.add_records(records, collection="parts") # CREATE A CONNECTORS COLLECTION IF CONNECTORS ARE PROVIDED if len(data.connectors): connector_records = records_from_data_files(data.connectors) for r in connector_records: set_record_topology(r, topology=data.topology) r.seq = r.seq.upper() if hasattr(r, "zip_file_name"): collection_name = ".".join(r.zip_file_name.split(".")[:-1]) else: collection_name = ".".join(r.file_name.split(".")[:-1]) repository.add_record(r, collection=collection_name) logger(message="Simulating the assembly plan...") filelike = file_to_filelike_object(data.assembly_plan) assembly_plan = dc.AssemblyPlan.from_spreadsheet( assembly_class="from_spreadsheet", path=filelike, name="_".join(data.assembly_plan.name.split(".")[:-1]), is_csv=data.assembly_plan.name.lower().endswith(".csv"), logger=logger, ) simulation = assembly_plan.simulate(sequence_repository=repository) stats = simulation.compute_stats() n_errors = stats["errored_assemblies"] logger(submessage="%s error(s) found" % n_errors) report_zip_data = simulation.write_report( target="@memory", assembly_report_writer=report_writer, logger=self.logger, ) errors = [ error for assembly_simulation in simulation.assembly_simulations for error in assembly_simulation.errors ] logger(message="All done!") return { "file": { "data": data_to_html_data(report_zip_data, "zip"), "name": "%s.zip" % assembly_plan.name, "mimetype": "application/zip", }, "assembly_stats": stats, "errors": [str(e) for e in errors], "success": True, }
def work(self): self.logger(message="Reading the Data...") data = self.data # CHANGE THE VALUE OF THE BOOLEANS def yes_no(value): return {"yes": True, "no": False}.get(value, value) include_fragment_plots = yes_no(data.include_fragment_plots) include_graph_plots = yes_no(data.include_graph_plots) include_assembly_plots = yes_no(data.include_assembly_plots) report_writer = dc.AssemblyReportWriter( include_mix_graphs=include_graph_plots, include_assembly_plots=include_assembly_plots, include_fragment_plots=include_fragment_plots, ) # INITIALIZE ALL RECORDS IN A SEQUENCE REPOSITORY records = records_from_data_files( data.parts, use_file_names_as_ids=data.use_file_names_as_ids) repository = dc.SequenceRepository() for record in records: # location-less features can cause bug when concatenating records. record.features = [ f for f in record.features if f.location is not None and f.location.start <= f.location.end ] for r in records: if data.backbone_first and r.id == data.backbone_name: r.is_backbone = True repository.add_records(records, collection="parts") # CREATE A CONNECTORS COLLECTION IF CONNECTORS ARE PROVIDED connectors_collection = None if len(data.connectors): connector_records = records_from_data_files(data.connectors) for r in records + connector_records: set_record_topology(r, topology=data.topology) r.seq = r.seq.upper() repository.add_records(connector_records, collection="connectors") connectors_collection = "connectors" # SIMULATE! self.logger(message="Simulating the assembly...") if not data.use_assembly_plan: # SCENARIO: SINGLE ASSEMBLY parts = [r.id for r in records] assembly = dc.Type2sRestrictionAssembly( name="simulated_assembly", parts=parts, enzyme=data.enzyme, expected_constructs="any_number", connectors_collection=connectors_collection, ) simulation = assembly.simulate(sequence_repository=repository) n = len(simulation.construct_records) self.logger( message="Done (%d constructs found), writing report..." % n) report_zip_data = simulation.write_report( target="@memory", report_writer=report_writer) return { "file": { "data": data_to_html_data(report_zip_data, "zip"), "name": "assembly_simulation.zip", "mimetype": "application/zip", }, "errors": [str(e) for e in simulation.errors], "n_constructs": len(simulation.construct_records), "success": True, } else: # SCENARIO: FULL ASSEMBLY PLAN filelike = file_to_filelike_object(data.assembly_plan) assembly_plan = dc.AssemblyPlan.from_spreadsheet( assembly_class=dc.Type2sRestrictionAssembly, path=filelike, connectors_collection=connectors_collection, expect_no_unused_parts=data.no_skipped_parts, expected_constructs=1 if data.single_assemblies else "any_number", name="_".join(data.assembly_plan.name.split(".")[:-1]), is_csv=data.assembly_plan.name.lower().endswith(".csv"), logger=self.logger, ) simulation = assembly_plan.simulate(sequence_repository=repository) stats = simulation.compute_stats() n_errors = stats["errored_assemblies"] self.logger(message="Done (%d errors), writing report..." % n_errors) report_zip_data = simulation.write_report( target="@memory", assembly_report_writer=report_writer, logger=self.logger, ) errors = [ error for assembly_simulation in simulation.assembly_simulations for error in assembly_simulation.errors ] return { "file": { "data": data_to_html_data(report_zip_data, "zip"), "name": "%s.zip" % assembly_plan.name, "mimetype": "application/zip", }, "assembly_stats": stats, "errors": [str(e) for e in errors], "success": True, } self.logger(message="Simulating the assembly...")