def test_circular_sequence_basic():
    np.random.seed(123)
    # Until the feature gets more battle-test, we're making sure it works
    # across a range of sequences.
    for i in range(4):
        dna_sequence = (
            "CTC"
            + dc.random_dna_sequence(100)
            + "CGTCTC"
            + dc.random_dna_sequence(100)
            + "CGT"
        )
        problem = dc.CircularDnaOptimizationProblem(
            sequence=dna_sequence,
            constraints=[
                dc.AvoidPattern("BsmBI_site"),
                dc.EnforceGCContent(
                    mini=0.4, maxi=0.6, location=(150, 250), window=50
                ),
                dc.UniquifyAllKmers(k=9, location=(10, 100)),
            ],
            logger=None,
        )
        assert not problem.all_constraints_pass()
        problem.resolve_constraints()
        assert problem.all_constraints_pass()
def test_circular_sequence_optimize_with_report(tmpdir):
    """Test that the custom function of CircularDnaOptimizationProblems works.
    """
    np.random.seed(123)
    # Until the feature gets more battle-test, we're making sure it works
    # across a range of sequences.
    dna_sequence = (
        "CTC"
        + dc.random_dna_sequence(100)
        + "CGTCTC"
        + dc.random_dna_sequence(100)
        + "CGT"
    )
    problem = dc.CircularDnaOptimizationProblem(
        sequence=dna_sequence,
        constraints=[
            dc.AvoidPattern("BsmBI_site"),
            dc.EnforceGCContent(
                mini=0.4, maxi=0.6, location=(150, 250), window=50
            ),
            dc.UniquifyAllKmers(k=9, location=(10, 100)),
        ],
        logger=None,
    )

    target = os.path.join(str(tmpdir), "circular_with_solution")
    os.mkdir(target)
    assert os.listdir(target) == []
    assert not problem.all_constraints_pass()
    success, message, data = problem.optimize_with_report(target)
    assert problem.all_constraints_pass()
    record = problem.to_record()
    assert str(record.seq) != dna_sequence
Beispiel #3
0
def test_constraints_reports():
    genbank_dir = os.path.join("tests", "data", "10_emma_genbanks")
    records = [
        dc.load_record(os.path.join(genbank_dir, filename), name=filename)
        for filename in os.listdir(genbank_dir)
    ]

    # DEFINE THE CONSTRAINTS TO BE CHECKED ON EACH RECORD

    constraints = [
        dc.AvoidPattern("BsaI_site"),
        dc.AvoidPattern("BsmBI_site"),
        dc.AvoidPattern("BbsI_site"),
        dc.AvoidPattern("8x1mer"),
        dc.AvoidPattern("5x3mer"),
        dc.AvoidPattern("9x2mer"),
        dc.AvoidHairpins(stem_size=20, hairpin_window=200),
        dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100),
    ]

    # CREATE A SPREADSHEET AND PLOTS OF THE BREACHES

    dataframe = cr.constraints_breaches_dataframe(constraints, records)
    records = cr.records_from_breaches_dataframe(dataframe, records)
    assert sum([len(r.features) for r in records]) == 157
    pdf_data = cr.breaches_records_to_pdf(records)

    assert 70000 < len(pdf_data) < 80000
Beispiel #4
0
def experiment_2(seed=123):
    np.random.seed(seed)
    sequence = dc.reverse_translate(dc.random_protein_sequence(1000))
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforceTranslation(),
            dc.EnforceGCContent(mini=0.4, maxi=0.6, window=50),
        ],
        objectives=[dc.CodonOptimize(species="e_coli")],
        logger=None,
    )
    problem.resolve_constraints()
    problem.optimize()
    return problem.sequence
Beispiel #5
0
    def work(self):

        data = self.data
        figures = []

        self.logger(message="Generating report...")
        records = records_from_data_files(data.files)
        constraints = [
            dc.AvoidPattern("BsaI_site"),
            dc.AvoidPattern("BsmBI_site"),
            dc.AvoidPattern("BbsI_site"),
            dc.AvoidPattern("SapI_site"),
            dc.AvoidPattern("8x1mer"),
            dc.AvoidPattern("5x3mer"),
            dc.AvoidPattern("9x2mer"),
            dc.AvoidHairpins(stem_size=20, hairpin_window=200),
            dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100),
            dc.EnforceGCContent(mini=0.1, maxi=0.9, window=100),
            dc.UniquifyAllKmers(k=15),
        ]

        dataframe = cr.constraints_breaches_dataframe(constraints, records)
        spreadsheet_io = BytesIO()
        dataframe.to_excel(spreadsheet_io)
        records = cr.records_from_breaches_dataframe(dataframe, records)
        zipped_records = flametree.file_tree("@memory")
        if data.include_genbanks:
            for record in records:
                target = zipped_records._file("%s.gb" % record.id)
                write_record(record, target)
        pdf_io = BytesIO()
        cr.breaches_records_to_pdf(records, pdf_io, logger=self.logger)

        return {
            "pdf_report": {
                "data":
                data_to_html_data(
                    pdf_io.getvalue(),
                    "pdf",
                    filename="manufacturability_report.pdf",
                ),
                "name":
                "manufacturability_report.pdf",
                "mimetype":
                "application/pdf",
            },
            "records": {
                "data":
                data_to_html_data(
                    zipped_records._close(),
                    "zip",
                    filename="manufacturability_annotated_records.zip",
                ),
                "name":
                "manufacturability_annotated_records.zip",
                "mimetype":
                "application/zip",
            },
            "spreadsheet": {
                "data":
                data_to_html_data(
                    spreadsheet_io.getvalue(),
                    "xlsx",
                    filename="manufacturability_report.xlsx",
                ),
                "name":
                "manufacturability_report.xlsx",
                "mimetype":
                "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
            },
        }
problem:

- The sequence is designed to have a cross-origin BsmBI site that will need
  to be removed, because the location-less specification ``AvoidPattern``
  is interpreted as applying to the full circle.

- The specification ``EnforceGCContent`` is cross-origin since its location is
  1500-2500, and the sequence is ~2000bp long.

"""

import dnachisel as dc

dna_sequence = "CTC%sCGTCTC%sCGT" % (
    dc.random_dna_sequence(1000),
    dc.random_dna_sequence(1000),
)

constraints = [
    dc.AvoidPattern("BsmBI_site"),
    dc.EnforceGCContent(mini=0.4, maxi=0.6, location=(1500, 2500), window=50),
    dc.UniquifyAllKmers(k=9, location=(10, 1000)),
]

problem = dc.CircularDnaOptimizationProblem(
    sequence=dna_sequence, constraints=constraints
)

print("BEFORE OPTIMIZATION:\n\n", problem.constraints_text_summary())
problem.resolve_constraints()
print("AFTER OPTIMIZATION:\n\n", problem.constraints_text_summary())
from copy import deepcopy
from collections import OrderedDict
import dnachisel as dc
import pandas
import seaborn as sns

sns.set()

# DEFINE HOW OPTIMIZATION PROBLEMS ARE CREATED

specifications = {
    "~keep": dc.AvoidChanges(),
    "~no(CG)": dc.AvoidPattern("CG"),
    "~codon_optimize": dc.CodonOptimize(species="e_coli"),
    "~unique_kmers": dc.UniquifyAllKmers(20),
    "~gc(39%)": dc.EnforceGCContent(target=0.39, window=200),
}
class_to_label = {
    spec.__class__: label for label, spec in specifications.items()
}
sequence = dc.load_record("record.gb")


def create_problem(boost_profile):
    location = dc.Location(1000, 9247)
    objectives = []
    for spec_name, boost in boost_profile.items():
        spec = specifications[spec_name]
        spec = spec.copy_with_changes(boost=boost, location=location)
        objectives.append(spec)
    return dc.DnaOptimizationProblem(
with open("sequence_to_optimize.txt", "r") as f:
    sequence = f.read()

deluxe_dna = CommercialDnaOffer(
    name="DeluxeDNA.com",
    sequence_constraints=[SequenceLengthConstraint(max_length=4000)],
    pricing=PerBasepairPricing(0.20),
    lead_time=10,
)

cheap_dna = CommercialDnaOffer(
    name="CheapDNA.com",
    sequence_constraints=[
        NoPatternConstraint(enzyme="BsaI"),
        dnachisel.EnforceGCContent(0.3, 0.7, window=60),
    ],
    pricing=PerBasepairPricing(0.10),
    lead_time=15,
)

# BLOCKS TO CHUNKS ASSEMBLY

gibson_blocks_assembly_station = DnaAssemblyStation(
    name="Gibson Blocks Assembly",
    assembly_method=GibsonAssemblyMethod(
        overhang_selector=TmSegmentSelector(),
        min_segment_length=1000,
        max_segment_length=6000,
        duration=8,
        cost=16,
Beispiel #9
0
import dnachisel.reports.constraints_reports as cr
import os

# IMPORT THE 10 RECORDS FROM THE genbanks/ FOLDER

records = [
    dc.load_record(os.path.join("genbanks", filename), name=filename)
    for filename in os.listdir("genbanks")
]

# DEFINE THE CONSTRAINTS TO BE CHECKED ON EACH RECORD

constraints = [
    dc.AvoidPattern("BsaI_site"),
    dc.AvoidPattern("BsmBI_site"),
    dc.AvoidPattern("BbsI_site"),
    dc.AvoidPattern("8x1mer"),
    dc.AvoidPattern("5x3mer"),
    dc.AvoidPattern("9x2mer"),
    dc.AvoidHairpins(stem_size=20, hairpin_window=200),
    dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100),
]

# CREATE A SPREADSHEET AND PLOTS OF THE BREACHES

dataframe = cr.constraints_breaches_dataframe(constraints, records)
dataframe.to_excel("breaches.xlsx")
records = cr.records_from_breaches_dataframe(dataframe, records)
cr.breaches_records_to_pdf(records, "breaches_plots.pdf")

print("Done! Check breaches.xlsx and breaches_plots.pdf for results.")