Ejemplo n.º 1
0
def test_no_solution_error_exhaustive_search():
    problem = DnaOptimizationProblem(
        sequence="TTTTTTT",
        constraints=[AvoidChanges((0, 4)), EnforceGCContent(mini=0.8)]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Exhaustive search failed' in str(err.value)
Ejemplo n.º 2
0
def test_no_solution_error_frozen_region():
    problem = DnaOptimizationProblem(
        sequence="AAAAATCGTCTCTTTT",
        constraints=[AvoidChanges(), AvoidPattern('BsmBI_site')]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'region that cannot be mutated' in str(err.value)
Ejemplo n.º 3
0
def test_no_solution_error_frozen_region():
    problem = DnaOptimizationProblem(
        sequence="AAAAATCGTCTCTTTT",
        constraints=[AvoidChanges(),
                     AvoidPattern(enzyme='BsmBI')])
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Constraint breach in frozen region' in str(err.value)
Ejemplo n.º 4
0
def test_no_solution_error_random_search():
    problem = DnaOptimizationProblem(
        sequence="TTTTTTTTTTTTTTTTTTTTTTTTTTTT",
        constraints=[AvoidChanges((0, 10)), EnforceGCContent(mini=0.8)]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Random search did not' in str(err.value)
Ejemplo n.º 5
0
def test_pattern_and_reverse():
    bsmbi = "CGTCTC"
    bsmbi_rev = "GAGACG"
    sequence = 10 * bsmbi + 25 * bsmbi_rev + 15 * bsmbi + 15 * bsmbi_rev
    problem = DnaOptimizationProblem(sequence,
                                     constraints=[AvoidPattern('BsmBI_site')],
                                     objectives=[AvoidChanges()])
    problem.resolve_constraints()
    problem.optimize()
    assert sum(problem.sequence_edits_as_array()) < 70
def test_optimization_with_report_no_solution(tmpdir):
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[AvoidPattern(enzyme='BsmBI'), AvoidChanges()]
    )
    target = os.path.join(str(tmpdir), 'no_solution')
    os.mkdir(target)
    assert os.listdir(target) == []
    success, message, data = optimization_with_report(target, problem)
    assert not success
    assert os.listdir(target) != []
Ejemplo n.º 7
0
def test_avoid_changes_with_indices_as_objectives():
    numpy.random.seed(123)

    indices = [10, 20] + list(range(30, 40)) + [44, 45, 46]
    sequence = random_dna_sequence(50)

    problem = DnaOptimizationProblem(
        sequence=sequence,
        objectives=[EnforceChanges(boost=0.5), AvoidChanges(indices=indices)],
        logger=None,
    )
    problem.optimize()
    assert problem.number_of_edits() == 50 - 15  # 15 == len(indices)

    problem = DnaOptimizationProblem(
        sequence=sequence,
        objectives=[EnforceChanges(boost=1.5), AvoidChanges(indices=indices)],
        logger=None,
    )
    problem.optimize()
    assert problem.number_of_edits() == 50
Ejemplo n.º 8
0
def test_optimize_with_report_no_solution(tmpdir):
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[AvoidPattern("BsmBI_site"), AvoidChanges()],
        logger=None,
    )
    target = os.path.join(str(tmpdir), "no_solution")
    os.mkdir(target)
    assert os.listdir(target) == []
    success, message, data = problem.optimize_with_report(target)
    assert not success
    assert os.listdir(target) != []
Ejemplo n.º 9
0
def test_AvoidChanges_with_max_edits():
    numpy.random.seed(1)
    problem = DnaOptimizationProblem(
        sequence="ATATATATATA",
        constraints=[
            AvoidChanges(max_edits=2),
            AvoidPattern("ATATA"),
            EnforcePatternOccurence("A", occurences=6, location=(0, 11, 1)),
            EnforcePatternOccurence("T", occurences=4, location=(0, 11, 1)),
        ],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_avoid_change_as_objectives_basics():
    numpy.random.seed(123)
    results = []
    for boost in (0, 0.1, 0.2, 1):
        sequence = random_dna_sequence(1000, seed=123)
        problem = DnaOptimizationProblem(
            sequence=sequence,
            objectives=[
                EnforceGCContent(
                    mini=0.45, maxi=0.55,
                    window=80).copy_with_changes(locations_span=300),
                AvoidChanges(boost=boost).as_passive_objective()
            ])

        problem.optimize()
        differences = sequences_differences(problem.sequence,
                                            problem.sequence_before)
        results.append(differences)
    assert results[0] > 40
    assert (results[0] > results[1] > results[2] > results[3])
    assert results[-1] == 0
Ejemplo n.º 11
0
		EnforceTranslation(),
		EnforceGCContent(mini=0.35, maxi=0.65, window=50), #TWIST: 25% and 65% GC
	],
	objectives=[CodonOptimize(codon_usage_table=codon_table_11)],
	)


if taxid and not input_path:
	print("\nOptimizing codons for taxonomic ID: " + taxid)
	#Read gene fasta sequence and initiate optimizer
	if not protein_flag:
		problem = DnaOptimizationProblem(
			sequence=gene,
			constraints=[
				#EnforceSequence(sequence = "ATG", location=(0, 2)),
				AvoidChanges(location=(0, 2)),
				AvoidPattern("BsmBI_site", "BamHI"),
				EnforceTranslation(),
				EnforceGCContent(mini=0.35, maxi=0.65, window=50), #TWIST: 25% and 65% GC

			],
			objectives=[CodonOptimize(species=taxid)],
		)


#Output and reporting
print("\nBefore optimization:")
print(problem.constraints_text_summary())
print(problem.objectives_text_summary())

problem.resolve_constraints(final_check=True)
Ejemplo n.º 12
0
    def domesticate(
        self,
        dna_sequence=None,
        protein_sequence=None,
        is_cds="default",
        codon_optimization=None,
        extra_constraints=(),
        extra_objectives=(),
        final_record_target=None,
        edit=False,
        barcode="",
        barcode_spacer="AA",
        report_target=None,
    ):
        """Domesticate a sequence.

        Parameters
        ----------

        dna_sequence
          The DNA sequence string to domesticate.

        protein_sequence
          Amino-acid sequence of the protein, which will be converted into
          a DNA sequence string.

        is_cds
          If True, sequence edits are restricted to synonymous mutations.

        codon_optimization
          Either None for no codon optimization or the name of an organism
          supported by DnaChisel.

        extra_constraints
          List of extra constraints to apply to the domesticated sequences.
          Each constraint is either a DnaChisel constraint or a function
          (dna_sequence => DnaChisel constraint).

        extra_objectives
          List of extra optimization objectives to apply to the domesticated
          sequences. Each objective is either a DnaChisel constraint or a
          function (dna_sequence => DnaChisel constraint).

        final_record_target
          Path to the file where to write the final genbank.

        edit
          Turn to True to allow sequence edits (if it is false and no all
          constraints are originally satisfied, a failed domestication result
          (i.e. with attribute ``success`` set to False) will be returned.

        report_target
          Target for the sequence optimization report (a folder path, or a zip
          path).

        barcode
          A sequence of DNA that will be added to the left of the sequence once
          the domestication is done.

        barcode_spacer
          Nucleotides to be added between the barcode and the enzyme (optional,
          the idea here is that they will make sure to avoid the creation of
          unwanted cutting sites).

        Returns
        -------

        final_record, edits_record, report_data, success, msg
        """
        if is_cds == "default":
            is_cds = self.cds_by_default
        if isinstance(dna_sequence, SeqRecord):
            problem = DnaOptimizationProblem.from_record(dna_sequence)
            for spec in problem.constraints + problem.objectives:
                spec.location += len(self.left_flank)
            extra_constraints = list(extra_constraints) + problem.constraints
            extra_objectives = list(extra_constraints) + problem.objectives

        if protein_sequence is not None:
            is_cds = True
            dna_sequence = reverse_translate(protein_sequence)
        constraints = [
            c(dna_sequence) if hasattr(c, "__call__") else c
            for c in list(extra_constraints) + self.constraints
        ]
        location = Location(len(self.left_flank),
                            len(self.left_flank) + len(dna_sequence))
        if is_cds:
            constraints.append(EnforceTranslation(location=location))
        objectives = [
            o(dna_sequence) if hasattr(o, "__call__") else o
            for o in list(extra_objectives) + self.objectives
        ]
        if codon_optimization:
            objectives.append(
                CodonOptimize(species=codon_optimization, location=location))
        if self.minimize_edits:
            objectives.append(AvoidChanges())

        extended_sequence = self.left_flank + dna_sequence + self.right_flank

        if (not is_cds) and (not edit):
            constraints.append(AvoidChanges())
        problem = DnaOptimizationProblem(
            extended_sequence,
            constraints=constraints,
            objectives=objectives,
            logger=self.logger,
        )
        all_constraints_pass = problem.all_constraints_pass()
        no_objectives = (len(problem.objectives) - self.minimize_edits) == 0
        report_data = None
        optimization_successful = True
        message = ""
        # print (all_constraints_pass, no_objectives)
        if not (all_constraints_pass and no_objectives):
            problem.n_mutations = self.simultaneous_mutations

            if report_target is not None:
                (success, message, report_data) = problem.optimize_with_report(
                    target=report_target, project_name=self.name)
                optimization_successful = success
            else:
                report_data = None
                try:
                    problem.resolve_constraints()
                    problem.optimize()
                except Exception as err:
                    message = str(err)
                    optimization_successful = False
                    report_data = None
        final_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
        )
        edits_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
            with_sequence_edits=True,
        )
        if final_record_target is not None:
            SeqIO.write(final_record, final_record_target, "genbank")

        return DomesticationResult(
            problem.sequence_before,
            final_record,
            edits_record,
            report_data,
            optimization_successful,
            message,
        )
Ejemplo n.º 13
0
    AvoidPattern("9xA"),
    AvoidPattern("9xT"),
    AvoidPattern(HomopolymerPattern("6xG")),
    AvoidPattern(HomopolymerPattern("6xC")),
    EnforceGCContent(0.4, 0.65),
    EnforceGCContent(0.25, 0.80, window=50),
]

CDS_constraints = []
for (start, end, strand) in CDS_list:
    if strand == 1:
        promoter_region = (start - 30, start - 1)
    else:
        promoter_region = (end + 1, end + 30)
    CDS_constraints += [
        AvoidChanges(promoter_region),
        EnforceTranslation((start, end, strand)),
    ]

# DEFINE OBJECTIVES

objectives = [EnforceGCContent(0.51, boost=10000)] + [
    CodonOptimize("e_coli", location=(start, end, strand))
    for (start, end, strand) in CDS_list
]

# DEFINE AND SOLVE THE PROBLEM

problem = DnaOptimizationProblem(
    sequence=record,
    constraints=dna_provider_constraints + CDS_constraints,
Ejemplo n.º 14
0
"""Example of use of the AvoidChanges as an objective to minimize modifications
of a sequence."""

from dnachisel import (DnaOptimizationProblem, random_dna_sequence,
                       AvoidPattern, AvoidChanges, sequences_differences,
                       EnforceGCContent)

# Note: we are not providing a location for AvoidChanges: it applies globally

for boost in (0, 0.1, 1, 10.0):
    sequence = random_dna_sequence(1000, seed=123)
    problem = DnaOptimizationProblem(
        sequence=sequence,
        objectives=[
            EnforceGCContent(mini=0.45, maxi=0.55, window=80),
            AvoidChanges(boost=boost).as_passive_objective()
        ])

    problem.optimize()
    differences = sequences_differences(problem.sequence,
                                        problem.sequence_before)

    print("%d nucleotides modified for boost=%.1f" % (differences, boost))