Exemple #1
0
def test_codon_optimize_as_hard_constraint():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(2000, seed=123),
        constraints=[
            EnforceTranslation(location=Location(1000, 1300)),
            CodonOptimize(location=Location(1000, 1300), species='e_coli')
        ]
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_EnforceTranslation_bacterial_valine_antisense():
    table_name = "Bacterial"
    protein = "LLTMMVTTTTVMVL"
    protein_sequence = reverse_translate(protein, table=table_name)
    for first_codon_before, first_codon_after in [
        ("ATG", "ATG"),  # methionine stays the only methionine codon
        ("GTG", "GTG"),  # valine-start-codon stays the only valine-start-codon
    ]:
        sequence = first_codon_before + protein_sequence
        cds_constraint = EnforceTranslation(
            genetic_table="Bacterial",
            start_codon="keep",
            location=Location(0, len(sequence), -1),
        )
        problem = DnaOptimizationProblem(
            sequence=reverse_complement(sequence),
            constraints=[cds_constraint],
            objectives=[EnforceChanges()],
            logger=None,
        )
        assert problem.constraints[0].translation == "MLLTMMVTTTTVMVL"
        problem.optimize()
        problem_sequence_rv = reverse_complement(problem.sequence)
        protein_after = translate(
            problem_sequence_rv, table_name, assume_start_codon=True
        )
        assert protein_after == "M" + protein
        assert problem_sequence_rv[:3] == first_codon_after
Exemple #3
0
def test_enforce_pattern_basics():
    numpy.random.seed(123)
    for seed in [2, 3, 123456]:
        # The seeds cover various cases:
        # 2: the problem has no occurences instead of 1 wanted
        # 3: the pattern has no occurences instead of 1 wanted
        # 123456: the pattern is over-represented (4 times instead of 1)
        sequence = random_dna_sequence(5000, seed=seed)

        constraints = [
            EnforceTranslation(location=Location(1000, 2500)),
            EnforceTranslation(location=Location(3000, 4500)),
            EnforcePatternOccurence("ANANANANTT",
                                    location=Location(1100, 2150)),
        ]

        problem = DnaOptimizationProblem(sequence=sequence,
                                         constraints=constraints,
                                         logger=None)
        assert not problem.all_constraints_pass()
        problem.resolve_constraints()
        assert problem.all_constraints_pass()
Exemple #4
0
 def evaluate(self, problem):
     """Return Gen9's ninemer score for the problem' sequence"""
     sequence = problem.sequence
     all_9mers = [sequence[i:i + 9] for i in range(len(sequence) - 9)]
     number_of_non_unique_9mers = sum([
         count for ninemer, count in Counter(all_9mers).items() if count > 1
     ])
     score = -(9.0 * number_of_non_unique_9mers) / len(sequence)
     return SpecEvaluation(self,
                           problem,
                           score=score,
                           locations=[Location(0, len(sequence))],
                           message="Score: %.02f (%d non-unique ninemers)" %
                           (score, number_of_non_unique_9mers))
 def __init__(
         self,
         left_overhang,
         right_overhang,
         left_addition="",
         right_addition="",
         enzyme="BsmBI",
         extra_avoided_sites=(),
         description="Golden Gate domesticator",
         name="unnamed_domesticator",
         cds_by_default=False,
         constraints=(),
         objectives=(),
 ):
     self.enzyme = enzyme
     self.left_overhang = left_overhang
     left_overhang = sequence_to_biopython_record(left_overhang)
     self.right_overhang = right_overhang
     right_overhang = sequence_to_biopython_record(right_overhang)
     for seq in [left_overhang, right_overhang]:
         annotate_record(seq, label=str(seq.seq))
     enzyme_seq = Restriction.__dict__[enzyme].site
     enzyme_seq = sequence_to_biopython_record(enzyme_seq)
     annotate_record(enzyme_seq, label=enzyme)
     self.enzyme_seq = enzyme_seq
     left_flank = self.enzyme_seq + "A" + left_overhang + left_addition
     right_flank = (right_addition + right_overhang +
                    (self.enzyme_seq + "A").reverse_complement())
     self.extra_avoided_sites = extra_avoided_sites
     constraints = list(constraints) + [(lambda seq: AvoidPattern(
         EnzymeSitePattern(enzyme),
         location=Location(len(left_flank),
                           len(left_flank) + len(seq)),
     )) for enz in ([enzyme] + list(extra_avoided_sites))]
     PartDomesticator.__init__(
         self,
         left_flank=left_flank,
         right_flank=right_flank,
         constraints=constraints,
         objectives=objectives,
         description=description,
         name=name,
         cds_by_default=cds_by_default,
     )
Exemple #6
0
def test_EnforceSequence():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(1234)
    for symbol, nucleotides in [("W", "AT"), ("S", "GC")]:
        n_nucleotides = 15
        start = 50
        location = (start, start + n_nucleotides)
        problem = DnaOptimizationProblem(
            sequence=25 * "ATGC",
            constraints=[
                AvoidPattern("ATGC"),
                AvoidPattern("AAA"),
                AvoidPattern("GGG"),
                EnforceSequence(n_nucleotides * symbol, location=location),
            ],
        )
        problem.max_random_iters = 10000
        problem.resolve_constraints()
        s, e = start, start + n_nucleotides
        assert all([n in nucleotides for n in problem.sequence[s:e]])

    # Test -1 strand:
    seq = "ATG" + "CAG" + "AGCAAGGTGCTGCT"
    problem = DnaOptimizationProblem(
        sequence=seq,
        constraints=[
            EnforcePatternOccurence(
                pattern="CTG",  # CAG on strand +1
                occurences=2,
                strand=-1,
                location=Location(start=0, end=50),
            )
        ],
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Exemple #7
0
print("DOWNLOADING AND PARSING THE GENBANK DATA...")

url = (
    "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"
    + "db=nucleotide&id=48994873&rettype=gb&retmode=txt"
)
genbank_data = request.urlopen(url).read().decode("utf-8")
genbank_record = load_record(StringIO(genbank_data), file_format="genbank")

print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...")

constraints = []
for feature in genbank_record.features:
    if feature.type == "gene" and len(feature.location.parts) == 1:
        location = Location.from_biopython_location(feature.location)
        if (len(location) % 3 == 0) and len(location) > 100:
            gene_constraints = [
                EnforceTranslation(location = location),
                AvoidPattern("BsmBI_site", location),
                EnforceGCContent(
                    mini=0.40, maxi=0.60, window=150, location=location
                ),
            ]
            constraints.extend(gene_constraints)
problem = DnaOptimizationProblem(genbank_record, constraints)

print("RESOLVING THE CONSTRAINTS...")

problem.logger.ignore_bars_under = 50
problem.resolve_constraints()
Exemple #8
0
from dnachisel import (EnforceTranslation, DnaOptimizationProblem,
                       random_dna_sequence, Location, EnforcePattern)

# sequence = random_dna_sequence(5000, seed=123456)
# sequence = random_dna_sequence(5000, seed=2)
# sequence = random_dna_sequence(5000, seed=3)
# for seed in [2, 3, 123456]:
sequence = random_dna_sequence(5000, seed=123)

constraints = [
    EnforceTranslation(Location(1000, 2500)),
    EnforceTranslation(Location(3000, 4500)),
    EnforcePattern("ANANANANTT", location=Location(1100, 2150)),
    EnforcePattern("ATGATGCCTK", location=Location(2700, 2800))
]

problem = DnaOptimizationProblem(sequence=sequence, constraints=constraints)
print(problem.constraints_text_summary())
assert not problem.all_constraints_pass()
problem.resolve_constraints()
assert problem.all_constraints_pass()
print(problem.constraints_text_summary())
Exemple #9
0
"""Example of use of the AvoidPAttern specification"""

from dnachisel import (DnaOptimizationProblem, random_dna_sequence,
                       CodonOptimize, Location, EnforceTranslation)

problem = DnaOptimizationProblem(
    sequence=random_dna_sequence(2000, seed=123),
    constraints=[
        EnforceTranslation(location=Location(1000, 1300)),
        CodonOptimize(location=Location(1000, 1300), species='e_coli')
    ])

print("\nBefore resolution:\n")
print(problem.constraints_text_summary())

problem.resolve_constraints()

print("\nAfter resolution:\n")
print(problem.constraints_text_summary())
Exemple #10
0
			base, ext = os.path.splitext(os.path.basename(args.vector))
			output_filename = base + "_" + destination + ext
			naive_construct, objectives, constraints = load_template(args.vector, placeholder, destination)
		else:
			output_filename = destination + ".gb"
			objectives = []
			constraints = []
			naive_construct = placeholder
			whole_seq_feat = SeqFeature()
			whole_seq_feat.type = "misc_feature"
			whole_seq_feat.qualifiers['label'] = [destination]
			whole_seq_feat.location = FeatureLocation(0,len(placeholder),strand=1)
			naive_construct.features.append(whole_seq_feat)

		dest_feat = find_annotation(naive_construct, placeholder.name)
		dest_loc = Location.from_biopython_location(dest_feat.location)


		user_objectives, user_constraints = load_user_options(args, dest_loc)
	

		objectives += user_objectives
		constraints += user_constraints


		problem = DnaOptimizationProblem(str(naive_construct.seq), constraints=constraints, objectives=objectives)

		domesticator_record = problem.to_record()

		mature_construct = naive_construct
		mature_construct.features.extend(domesticator_record.features)
Exemple #11
0
    def domesticate(
        self,
        dna_sequence=None,
        protein_sequence=None,
        is_cds="default",
        codon_optimization=None,
        extra_constraints=(),
        extra_objectives=(),
        final_record_target=None,
        edit=False,
        barcode="",
        barcode_spacer="AA",
        report_target=None,
    ):
        """Domesticate a sequence.

        Parameters
        ----------

        dna_sequence
          The DNA sequence string to domesticate.

        protein_sequence
          Amino-acid sequence of the protein, which will be converted into
          a DNA sequence string.

        is_cds
          If True, sequence edits are restricted to synonymous mutations.

        codon_optimization
          Either None for no codon optimization or the name of an organism
          supported by DnaChisel.

        extra_constraints
          List of extra constraints to apply to the domesticated sequences.
          Each constraint is either a DnaChisel constraint or a function
          (dna_sequence => DnaChisel constraint).

        extra_objectives
          List of extra optimization objectives to apply to the domesticated
          sequences. Each objective is either a DnaChisel constraint or a
          function (dna_sequence => DnaChisel constraint).

        final_record_target
          Path to the file where to write the final genbank.

        edit
          Turn to True to allow sequence edits (if it is false and no all
          constraints are originally satisfied, a failed domestication result
          (i.e. with attribute ``success`` set to False) will be returned.

        report_target
          Target for the sequence optimization report (a folder path, or a zip
          path).

        barcode
          A sequence of DNA that will be added to the left of the sequence once
          the domestication is done.

        barcode_spacer
          Nucleotides to be added between the barcode and the enzyme (optional,
          the idea here is that they will make sure to avoid the creation of
          unwanted cutting sites).

        Returns
        -------

        final_record, edits_record, report_data, success, msg
        """
        if is_cds == "default":
            is_cds = self.cds_by_default
        if isinstance(dna_sequence, SeqRecord):
            problem = DnaOptimizationProblem.from_record(dna_sequence)
            for spec in problem.constraints + problem.objectives:
                spec.location += len(self.left_flank)
            extra_constraints = list(extra_constraints) + problem.constraints
            extra_objectives = list(extra_constraints) + problem.objectives

        if protein_sequence is not None:
            is_cds = True
            dna_sequence = reverse_translate(protein_sequence)
        constraints = [
            c(dna_sequence) if hasattr(c, "__call__") else c
            for c in list(extra_constraints) + self.constraints
        ]
        location = Location(len(self.left_flank),
                            len(self.left_flank) + len(dna_sequence))
        if is_cds:
            constraints.append(EnforceTranslation(location=location))
        objectives = [
            o(dna_sequence) if hasattr(o, "__call__") else o
            for o in list(extra_objectives) + self.objectives
        ]
        if codon_optimization:
            objectives.append(
                CodonOptimize(species=codon_optimization, location=location))
        if self.minimize_edits:
            objectives.append(AvoidChanges())

        extended_sequence = self.left_flank + dna_sequence + self.right_flank

        if (not is_cds) and (not edit):
            constraints.append(AvoidChanges())
        problem = DnaOptimizationProblem(
            extended_sequence,
            constraints=constraints,
            objectives=objectives,
            logger=self.logger,
        )
        all_constraints_pass = problem.all_constraints_pass()
        no_objectives = (len(problem.objectives) - self.minimize_edits) == 0
        report_data = None
        optimization_successful = True
        message = ""
        # print (all_constraints_pass, no_objectives)
        if not (all_constraints_pass and no_objectives):
            problem.n_mutations = self.simultaneous_mutations

            if report_target is not None:
                (success, message, report_data) = problem.optimize_with_report(
                    target=report_target, project_name=self.name)
                optimization_successful = success
            else:
                report_data = None
                try:
                    problem.resolve_constraints()
                    problem.optimize()
                except Exception as err:
                    message = str(err)
                    optimization_successful = False
                    report_data = None
        final_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
        )
        edits_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
            with_sequence_edits=True,
        )
        if final_record_target is not None:
            SeqIO.write(final_record, final_record_target, "genbank")

        return DomesticationResult(
            problem.sequence_before,
            final_record,
            edits_record,
            report_data,
            optimization_successful,
            message,
        )
Exemple #12
0
def test_enforce_pattern_options():
    # Checks for Github issue #53
    # Test 6 cases: location yes/no, 3 strand options

    sequence = "A" * 10
    pattern = "C" * 4
    # location=None
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(pattern,
                                       occurences=1,
                                       strand="from_location"),
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(pattern, occurences=1, strand="both")
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(pattern, occurences=1, strand=-1)
        ],
        logger=None,
    )
    assert problem.constraints[0].evaluate(problem).score == -1
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert dc.reverse_complement(
        pattern) in problem.sequence  # other strand used

    # location specificed
    # Use -1 strand from location:
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(
                pattern,
                occurences=1,
                strand="from_location",
                location=Location(1, 6, strand=-1),
            )
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert dc.reverse_complement(pattern) in problem.sequence

    # Overwrite -1 strand to "both":
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(
                pattern,
                occurences=1,
                strand="both",
                location=Location(1, 6, strand=-1),
            )
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence  # uses +1 strand by default

    # Overwrite -1 strand to +1:
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(
                pattern,
                occurences=1,
                strand=1,
                location=Location(1, 6, strand=-1),
            )
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence  # uses +1 strand
def test_avoid_pattern_options():
    # Checks Github issue #53
    pattern = "C" * 4
    sequence = "A" * 6 + pattern

    # location=None
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidPattern(pattern, strand="from_location")],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern not in problem.sequence

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidPattern(pattern, strand="both")],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern not in problem.sequence

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidPattern(pattern, strand=-1)],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence

    # location specified
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            AvoidPattern(pattern,
                         location=Location(0, 10, -1),
                         strand="from_location")
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    # sequence not changed because location strand is -1:
    assert pattern in problem.sequence

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            AvoidPattern(pattern, location=Location(0, 10, -1), strand="both")
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    # sequence changed because strand option overwrites location:
    assert pattern not in problem.sequence

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            AvoidPattern(pattern, location=Location(0, 10, 1), strand=-1)
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    # sequence not changed because strand option overwrites location strand:
    assert pattern in problem.sequence
def test_location_strand_gets_conserved():
    cst = AvoidPattern("AACAAAT", Location(4, 1624, -1))
    location = Location(9, 10)
    new_cst = cst.localized(location)
    assert new_cst.location.to_tuple() == (4, 16, -1)