Ejemplo n.º 1
0
def test_get_repo_part_names_by_collection():
    repo = dc.SequenceRepository()
    repo.add_records([("id_1", "ATGC")], collection="parts")
    repo.add_records([("id_2", "ATGC")], collection="other")

    result = repo.get_part_names_by_collection(format="string")
    assert result == 'parts\n- id_1\nother\n- id_2'
Ejemplo n.º 2
0
def test_not_in_repository_error():

    repo = dc.SequenceRepository()
    records = [("id_1", "ATGC"), ("id_2", "AAAAAT")]
    repo.add_records(records)
    with pytest.raises(dc.NotInRepositoryError):
        repo.get_records(['id_3', 'id_4'])
Ejemplo n.º 3
0
def test_repository_duplicate_error():

    repo = dc.SequenceRepository()
    records = [("id_1", "ATGC"), ("id_2", "AAAAAT")]
    repo.add_records(records)
    with pytest.raises(dc.RepositoryDuplicateError):
        repo.add_records(records)
Ejemplo n.º 4
0
def test_autoselect_connectors():
    repo = dc.SequenceRepository()
    repo.import_records(
        collection="parts",
        folder=os.path.join(this_directory, "parts"),
        use_file_names_as_ids=True,
        topology="circular"
    )
    repo.import_records(
        collection="emma_connectors",
        folder=os.path.join(this_directory, "emma_connectors"),
        use_file_names_as_ids=True,
    )
    all_parts = list(repo.collections["parts"].keys())
    assembly = dc.Type2sRestrictionAssembly(
        parts=all_parts, connectors_collection="emma_connectors"
    )
    simulation = assembly.simulate(sequence_repository=repo)
    assert len(simulation.construct_records) == 1
    assert len(simulation.warnings) == 1
    warning = simulation.warnings[0]
    # selected_connectors = mix.autoselect_connectors(connectors)
    assert sorted(warning.data["selected_connectors"]) == [
        "conn_A_B",
        "conn_D-F",
        "conn_J-K",
        "conn_L-N",
        "conn_R-W",
        "conn_W-Z",
    ]
Ejemplo n.º 5
0
def extract_records_from_quote(quote):
    quote.compute_full_assembly_plan()
    records = {
        q.id: dc.sequence_to_biopython_record(q.sequence, id=q.id)
        for loc, q in quote.assembly_plan.items()
    }
    repo = dc.SequenceRepository(collections={'parts': records})
    record_names = list(records.keys())
    return record_names, repo
Ejemplo n.º 6
0
def test_BASIC_assembly():
    repo = dc.SequenceRepository()
    repo.import_records(folder=parts_and_oligos_folder)
    plan = dc.AssemblyPlan.from_spreadsheet(path=assembly_plan_path,
                                            assembly_class="from_spreadsheet")
    simulation = plan.simulate(repo)
    stats = simulation.compute_stats()
    assert stats["valid_assemblies"] == 10
    assert stats["errored_assemblies"] == 0
Ejemplo n.º 7
0
def test_single_gibson():
    repository = dc.SequenceRepository()
    repository.import_records(files=[sequences_fasta])
    parts = ["Frag_%d" % i for i in [1, 2, 3, 4, 5]]
    expected_record = repository.get_record("expected_sequence")
    assembly = dc.GibsonAssembly(parts=parts, homology_checker="default")
    simulation = assembly.simulate(sequence_repository=repository)
    assert len(simulation.construct_records) == 1
    simulated_record = simulation.construct_records[0]
    assert sequences_are_circularly_equal([simulated_record, expected_record])
Ejemplo n.º 8
0
def test_type2s_hierarchical():
    repository = dc.SequenceRepository()
    repository.import_records(folder=parts_folder, use_file_names_as_ids=True)
    assembly_plan = dc.AssemblyPlan.from_spreadsheet(
        assembly_class=dc.Type2sRestrictionAssembly,
        path=os.path.join(this_directory, "type2s_two-level.csv"),
    )
    assert sorted(assembly_plan.levels) == [1, 2]
    plan_simulation = assembly_plan.simulate(sequence_repository=repository)
    stats = plan_simulation.compute_stats()
    assert stats["valid_assemblies"] == 4
Ejemplo n.º 9
0
def test_lcr_assembly():
    repo = dc.SequenceRepository()
    repo.import_records(files=[oligos_fasta, parts_fasta])
    plan = dc.AssemblyPlan.from_spreadsheet(path=assembly_plan_path)
    simulation = plan.simulate(repo)
    stats = simulation.compute_stats()
    assert stats["valid_assemblies"] == 1
    # The second assembly is flawed on purpose
    assert stats["errored_assemblies"] == 1

    # Coverage!
    simulation.write_report("@memory")
Ejemplo n.º 10
0
def test_hierarchical_biobrick():
    repository = dc.SequenceRepository()
    repository.import_records(folder=parts_folder, use_file_names_as_ids=True)
    assembly_plan = dc.AssemblyPlan.from_spreadsheet(
        assembly_class=dc.BioBrickStandardAssembly,
        path=os.path.join(this_directory, "hierarchical_biobrick.csv"),
    )
    plan_simulation = assembly_plan.simulate(sequence_repository=repository)
    stats = plan_simulation.compute_stats()
    assert stats["valid_assemblies"] == 3
    report_writer = dc.AssemblyReportWriter(include_mix_graphs=True)
    plan_simulation.write_report("@memory",
                                 assembly_report_writer=report_writer)
Ejemplo n.º 11
0
def test_combinatorial_type2s():
    repository = dc.SequenceRepository()
    repository.import_records(folder=parts_folder, use_file_names_as_ids=True)
    parts_list = list(repository.collections["parts"])

    # EXPECT A SINGLE CONSTRUCT, GET AN ERROR

    assembly = dc.Type2sRestrictionAssembly(parts_list, expected_constructs=1)
    simulation = assembly.simulate(sequence_repository=repository)
    assert len(simulation.errors) == 1
    assert len(simulation.construct_records) == 5

    # DON'T EXPECT A CERTAIN NUMBER, GET NO ERROR

    assembly = dc.Type2sRestrictionAssembly(parts_list,
                                            expected_constructs="any_number")
    simulation = assembly.simulate(sequence_repository=repository)
    assert len(simulation.errors) == 0
    assert len(simulation.construct_records) == 5

    # LIMIT THE NUMBER OF CONSTRUCTS, GET LESS CONSTRUCTS, AND A WARNING

    assembly = dc.Type2sRestrictionAssembly(
        parts_list,
        max_constructs=3,
        expected_constructs="any_number",
        expect_no_unused_parts=False,
    )
    simulation = assembly.simulate(sequence_repository=repository)
    assert len(simulation.errors) == 0
    assert len(simulation.warnings) == 1  # Max constructs reached warning
    assert len(simulation.construct_records) == 3

    # TEST RANDOMIZED ASSEMBLY PICKING
    assembly = dc.Type2sRestrictionAssembly(
        parts_list,
        max_constructs=2,
        expected_constructs="any_number",
        expect_no_unused_parts=False,
        randomize_constructs=True,
    )
    simulation = assembly.simulate(sequence_repository=repository)
    assert len(simulation.errors) == 0
    assert len(simulation.warnings) == 1  # Max constructs reached warning
    assert len(simulation.construct_records) == 2
Ejemplo n.º 12
0
def test_gibson_assembly_plan():
    repository = dc.SequenceRepository()
    repository.import_records(files=[sequences_fasta])
    assembly_plan = dc.AssemblyPlan.from_spreadsheet(
        assembly_class=dc.GibsonAssembly,
        path=os.path.join(this_directory, "gibson_plan.csv"),
    )
    plan_simulation = assembly_plan.simulate(sequence_repository=repository)
    stats = plan_simulation.compute_stats()
    assert stats["valid_assemblies"] == 3
    assert stats["errored_assemblies"] == 2
    report_writer = dc.AssemblyReportWriter(
        include_mix_graphs=True,
        include_assembly_plots=True,
        show_overhangs_in_graph=True,
        annotate_parts_homologies=True,
    )
    plan_simulation.write_report(target="@memory",
                                 assembly_report_writer=report_writer)
Ejemplo n.º 13
0
def test_single_assembly(tmpdir):
    repository = dc.SequenceRepository()
    repository.import_records(folder=parts_folder, use_file_names_as_ids=True)
    assembly_plan = dc.AssemblyPlan.from_spreadsheet(
        assembly_class=dc.Type2sRestrictionAssembly,
        path=os.path.join(this_directory, "type2s_two-level.csv"),
    )
    plan_simulation = assembly_plan.simulate(sequence_repository=repository)
    stats = plan_simulation.compute_stats()
    report_writer = dc.AssemblyReportWriter(
        include_fragment_plots=False,
        include_part_plots=False,
        include_mix_graphs=False,
        include_assembly_plots=False,
        show_overhangs_in_graph=False,
        annotate_parts_homologies=False,
        include_pdf_report=True,
    )
    plan_simulation.write_report(target="@memory", assembly_report_writer=report_writer)
Ejemplo n.º 14
0
def test_type2s_hierarchical_flawed():
    repository = dc.SequenceRepository()
    repository.import_records(folder=parts_folder, use_file_names_as_ids=True)
    assembly_plan = dc.AssemblyPlan.from_spreadsheet(
        assembly_class=dc.Type2sRestrictionAssembly,
        path=os.path.join(this_directory, "type2s_two-level_flawed.xls"),
    )
    assert sorted(assembly_plan.levels) == [1, 2]
    plan_simulation = assembly_plan.simulate(sequence_repository=repository)
    stats = plan_simulation.compute_stats()
    assert stats["valid_assemblies"] == 2  # Construct 1 and its dependant
    assert stats["errored_assemblies"] == 1
    assert stats["cancelled_assemblies"] == 1

    report_writer = dc.AssemblyReportWriter(
        include_fragment_plots=True,
        include_part_plots=True,
        include_mix_graphs=True,
        include_assembly_plots=True,
        show_overhangs_in_graph=True,
        annotate_parts_homologies=True,
    )
    plan_simulation.write_report(target="@memory",
                                 assembly_report_writer=report_writer)
Ejemplo n.º 15
0
import dnacauldron as dc
import os

repository = dc.SequenceRepository()
repository.import_records(folder="parts", use_file_names_as_ids=True)
parts_list = list(repository.collections["parts"])
assembly = dc.Type2sRestrictionAssembly(
    name="randomized_combinatorial_asm",
    parts=parts_list,
    expected_constructs="any_number",
    randomize_constructs=True,
    max_constructs=2,
)

simulation = assembly.simulate(sequence_repository=repository, )
output_path = os.path.join("output", "randomized_combinatorial")
simulation.write_report(target=output_path)
print("Done! see output/randomized_combinatorial folder for the results.")
    def work(self):
        data = self.data
        logger = self.logger
        logger(message="Reading the Data...")

        # CHANGE THE VALUE OF THE BOOLEANS

        def yes_no(value):
            return {"yes": True, "no": False}.get(value, value)

        include_fragment_plots = yes_no(data.include_fragment_plots)
        include_graph_plots = yes_no(data.include_graph_plots)
        include_assembly_plots = yes_no(data.include_assembly_plots)
        report_writer = dc.AssemblyReportWriter(
            include_mix_graphs=include_graph_plots,
            include_assembly_plots=include_assembly_plots,
            include_fragment_plots=include_fragment_plots,
        )
        # INITIALIZE ALL RECORDS IN A SEQUENCE REPOSITORY

        # INITIALIZE ALL RECORDS IN A SEQUENCE REPOSITORY

        logger(message="Parsing the sequences...")

        records = records_from_data_files(
            data.parts, use_file_names_as_ids=data.use_file_names_as_ids)
        repository = dc.SequenceRepository()
        for record in records:
            # location-less features can cause bug when concatenating records.
            record.features = [
                f for f in record.features if f.location is not None
                and f.location.start <= f.location.end
            ]
        for r in records:
            set_record_topology(r, topology=data.topology)
            r.seq = r.seq.upper()
        repository.add_records(records, collection="parts")

        # CREATE A CONNECTORS COLLECTION IF CONNECTORS ARE PROVIDED
        if len(data.connectors):
            connector_records = records_from_data_files(data.connectors)
            for r in connector_records:
                set_record_topology(r, topology=data.topology)
                r.seq = r.seq.upper()
                if hasattr(r, "zip_file_name"):
                    collection_name = ".".join(r.zip_file_name.split(".")[:-1])
                else:
                    collection_name = ".".join(r.file_name.split(".")[:-1])
                repository.add_record(r, collection=collection_name)

        logger(message="Simulating the assembly plan...")
        filelike = file_to_filelike_object(data.assembly_plan)
        assembly_plan = dc.AssemblyPlan.from_spreadsheet(
            assembly_class="from_spreadsheet",
            path=filelike,
            name="_".join(data.assembly_plan.name.split(".")[:-1]),
            is_csv=data.assembly_plan.name.lower().endswith(".csv"),
            logger=logger,
        )
        simulation = assembly_plan.simulate(sequence_repository=repository)
        stats = simulation.compute_stats()
        n_errors = stats["errored_assemblies"]
        logger(submessage="%s error(s) found" % n_errors)
        report_zip_data = simulation.write_report(
            target="@memory",
            assembly_report_writer=report_writer,
            logger=self.logger,
        )
        errors = [
            error for assembly_simulation in simulation.assembly_simulations
            for error in assembly_simulation.errors
        ]
        logger(message="All done!")

        return {
            "file": {
                "data": data_to_html_data(report_zip_data, "zip"),
                "name": "%s.zip" % assembly_plan.name,
                "mimetype": "application/zip",
            },
            "assembly_stats": stats,
            "errors": [str(e) for e in errors],
            "success": True,
        }
Ejemplo n.º 17
0
    def work(self):
        self.logger(message="Reading the Data...")
        data = self.data

        # CHANGE THE VALUE OF THE BOOLEANS

        def yes_no(value):
            return {"yes": True, "no": False}.get(value, value)

        include_fragment_plots = yes_no(data.include_fragment_plots)
        include_graph_plots = yes_no(data.include_graph_plots)
        include_assembly_plots = yes_no(data.include_assembly_plots)
        report_writer = dc.AssemblyReportWriter(
            include_mix_graphs=include_graph_plots,
            include_assembly_plots=include_assembly_plots,
            include_fragment_plots=include_fragment_plots,
        )

        # INITIALIZE ALL RECORDS IN A SEQUENCE REPOSITORY

        records = records_from_data_files(
            data.parts, use_file_names_as_ids=data.use_file_names_as_ids)
        repository = dc.SequenceRepository()
        for record in records:
            # location-less features can cause bug when concatenating records.
            record.features = [
                f for f in record.features if f.location is not None
                and f.location.start <= f.location.end
            ]
        for r in records:
            if data.backbone_first and r.id == data.backbone_name:
                r.is_backbone = True
        repository.add_records(records, collection="parts")

        # CREATE A CONNECTORS COLLECTION IF CONNECTORS ARE PROVIDED

        connectors_collection = None
        if len(data.connectors):
            connector_records = records_from_data_files(data.connectors)
            for r in records + connector_records:
                set_record_topology(r, topology=data.topology)
                r.seq = r.seq.upper()
            repository.add_records(connector_records, collection="connectors")
            connectors_collection = "connectors"

        # SIMULATE!

        self.logger(message="Simulating the assembly...")

        if not data.use_assembly_plan:

            # SCENARIO: SINGLE ASSEMBLY

            parts = [r.id for r in records]
            assembly = dc.Type2sRestrictionAssembly(
                name="simulated_assembly",
                parts=parts,
                enzyme=data.enzyme,
                expected_constructs="any_number",
                connectors_collection=connectors_collection,
            )
            simulation = assembly.simulate(sequence_repository=repository)
            n = len(simulation.construct_records)
            self.logger(
                message="Done (%d constructs found), writing report..." % n)
            report_zip_data = simulation.write_report(
                target="@memory", report_writer=report_writer)
            return {
                "file": {
                    "data": data_to_html_data(report_zip_data, "zip"),
                    "name": "assembly_simulation.zip",
                    "mimetype": "application/zip",
                },
                "errors": [str(e) for e in simulation.errors],
                "n_constructs": len(simulation.construct_records),
                "success": True,
            }

        else:

            # SCENARIO: FULL ASSEMBLY PLAN

            filelike = file_to_filelike_object(data.assembly_plan)
            assembly_plan = dc.AssemblyPlan.from_spreadsheet(
                assembly_class=dc.Type2sRestrictionAssembly,
                path=filelike,
                connectors_collection=connectors_collection,
                expect_no_unused_parts=data.no_skipped_parts,
                expected_constructs=1
                if data.single_assemblies else "any_number",
                name="_".join(data.assembly_plan.name.split(".")[:-1]),
                is_csv=data.assembly_plan.name.lower().endswith(".csv"),
                logger=self.logger,
            )

            simulation = assembly_plan.simulate(sequence_repository=repository)
            stats = simulation.compute_stats()
            n_errors = stats["errored_assemblies"]
            self.logger(message="Done (%d errors), writing report..." %
                        n_errors)
            report_zip_data = simulation.write_report(
                target="@memory",
                assembly_report_writer=report_writer,
                logger=self.logger,
            )
            errors = [
                error
                for assembly_simulation in simulation.assembly_simulations
                for error in assembly_simulation.errors
            ]

            return {
                "file": {
                    "data": data_to_html_data(report_zip_data, "zip"),
                    "name": "%s.zip" % assembly_plan.name,
                    "mimetype": "application/zip",
                },
                "assembly_stats": stats,
                "errors": [str(e) for e in errors],
                "success": True,
            }
        self.logger(message="Simulating the assembly...")