def test_EnforceTranslation_bacterial_valine():
    table_name = "Bacterial"
    protein = "LLTMMVTTTTVMVL"
    protein_sequence = reverse_translate(protein, table=table_name)

    for first_codon_before, first_codon_after in [
        ("ATG", "ATG"),  # methionine stays the only methionine codon
        ("GTG", "GTG"),  # valine-start-codon stays the only valine-start-codon
    ]:
        sequence = first_codon_before + protein_sequence
        cds_constraint = EnforceTranslation(
            genetic_table="Bacterial", start_codon="keep"
        )
        problem = DnaOptimizationProblem(
            sequence=sequence,
            constraints=[cds_constraint],
            objectives=[EnforceChanges()],
            logger=None,
        )
        assert problem.constraints[0].translation == "MLLTMMVTTTTVMVL"
        problem.optimize()
        protein_after = translate(
            problem.sequence, table_name, assume_start_codon=True
        )
        assert protein_after == "M" + protein
        assert problem.sequence[:3] == first_codon_after
Esempio n. 2
0
    def verify_constraints(self, sequence):
        """Return True iff `sequence` passes all `self.sequence_constraints`

        Will automatically process DNA-Chisel constraints that would be in
        `self.sequence_constraints`

        """
        constraints = self.sequence_constraints
        if not hasattr(self, "dnachisel_constraints"):
            self.dnachisel_constraints = [
                constraint
                for constraint in self.sequence_constraints
                if isinstance(constraint, Specification)
            ]

        if self.dnachisel_constraints != []:
            if not DNACHISEL_AVAILABLE:
                raise ImportError(
                    "Spotted DNA Chisel constraints, while "
                    "DNA Chisel is not installed."
                )
            # We provide an empty mutation space so it won't be recomputed
            # (which would take time and is useless here!)
            problem = DnaOptimizationProblem(
                sequence, self.dnachisel_constraints, mutation_space=[]
            )
            constraints = [
                constraint
                for constraint in constraints
                if not isinstance(constraint, Specification)
            ] + [lambda seq: problem.all_constraints_pass()]

        return all(constraint(sequence) for constraint in constraints)
Esempio n. 3
0
def test_no_solution_error_frozen_region():
    problem = DnaOptimizationProblem(
        sequence="AAAAATCGTCTCTTTT",
        constraints=[AvoidChanges(),
                     AvoidPattern(enzyme='BsmBI')])
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Constraint breach in frozen region' in str(err.value)
Esempio n. 4
0
def test_no_solution_error_random_search():
    problem = DnaOptimizationProblem(
        sequence="TTTTTTTTTTTTTTTTTTTTTTTTTTTT",
        constraints=[AvoidChanges((0, 10)), EnforceGCContent(mini=0.8)]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Random search did not' in str(err.value)
def test_AvoidNonUniqueSegments_as_objective():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    specification = AvoidNonUniqueSegments(8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     objectives=[specification])
    problem.optimize()
    assert problem.objectives[0].evaluate(problem).passes
Esempio n. 6
0
def test_no_solution_error_exhaustive_search():
    problem = DnaOptimizationProblem(
        sequence="TTTTTTT",
        constraints=[AvoidChanges((0, 4)), EnforceGCContent(mini=0.8)]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Exhaustive search failed' in str(err.value)
Esempio n. 7
0
def test_no_solution_error_frozen_region():
    problem = DnaOptimizationProblem(
        sequence="AAAAATCGTCTCTTTT",
        constraints=[AvoidChanges(), AvoidPattern('BsmBI_site')]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'region that cannot be mutated' in str(err.value)
Esempio n. 8
0
def test_constraints_text_summary():
    problem = DnaOptimizationProblem(sequence="ATTGCCATATGCGC",
                                     constraints=[
                                         EnforceGCContent(mini=0.4, maxi=0.6),
                                         AvoidPattern('ATT')
                                     ])
    text = problem.constraints_text_summary()
    assert 'FAILURE: 1 constraints evaluations failed' in text
Esempio n. 9
0
def test_UniquifyAllKmers_as_objective():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    specification = UniquifyAllKmers(8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     objectives=[specification],
                                     logger=None)
    problem.optimize()
    assert problem.objectives[0].evaluate(problem).passes
Esempio n. 10
0
def test_random_compatible_dna_sequence():
    constraints = [
        EnforceGCContent(mini=0.4, maxi=0.6, window=50),
        AvoidPattern('ATC')
    ]
    seq = random_compatible_dna_sequence(1000, constraints=constraints)
    problem = DnaOptimizationProblem(sequence=seq, constraints=constraints)
    assert ("ATC" not in seq)
    assert problem.all_constraints_pass()
def test_optimize_with_report(tmpdir):
    problem = DnaOptimizationProblem(sequence=random_dna_sequence(10000,
                                                                  seed=123),
                                     constraints=[AvoidPattern('BsmBI_site')])

    target = os.path.join(str(tmpdir), 'with_solution')
    os.mkdir(target)
    assert os.listdir(target) == []
    success, message, data = problem.optimize_with_report(target)
    assert success
    assert os.listdir(target) != []
Esempio n. 12
0
def test_optimize_with_report_no_solution(tmpdir):
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[AvoidPattern("BsmBI_site"), AvoidChanges()],
        logger=None,
    )
    target = os.path.join(str(tmpdir), "no_solution")
    os.mkdir(target)
    assert os.listdir(target) == []
    success, message, data = problem.optimize_with_report(target)
    assert not success
    assert os.listdir(target) != []
Esempio n. 13
0
def test_AvoidRareCodons_as_constraint():
    numpy.random.seed(123)

    sequence = "ATG" "TTT" "ATA" "CCA" "CTT" "TAG"
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation(),
                     AvoidRareCodons(0.11, "e_coli")],
    )
    assert problem.all_constraints_pass()
    assert problem.sequence_edits_as_array().sum() == 4
    assert translate(problem.sequence) == translate(sequence)
Esempio n. 14
0
def test_AllowPrimer():
    primers = ["ATTGCGCCAAACT", "TAATCCACCCTAAT", "ATTCACACTTCAA"]
    problem = DnaOptimizationProblem(sequence=40 * "A",
                                     constraints=[
                                         AllowPrimer(
                                             tmin=50,
                                             tmax=60,
                                             max_homology_length=5,
                                             location=(10, 30),
                                             avoid_heterodim_with=primers)
                                     ])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Esempio n. 15
0
def test_avoid_changes_with_indices_as_constraint():
    numpy.random.seed(123)

    indices = [10, 20] + list(range(30, 40)) + [44, 45, 46]
    sequence = random_dna_sequence(50)

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidChanges(indices=indices)],
        objectives=[EnforceChanges()],
        logger=None,
    )
    problem.optimize()
    assert problem.number_of_edits() == 50 - 15
Esempio n. 16
0
    def work(self):

        data = self.data
        self.logger(message='Initializing...')

        if data.editFeatures:
            record = sequence_to_biopython_record(data.sequence.upper())
            for feature in sorted(data.editedFeatures.values(),
                                  key=lambda f: (f.start, f.end)):
                annotate_record(record,
                                feature_type="misc_feature",
                                location=(feature.start, feature.end),
                                label=feature.label)
        else:
            records, fmt = records_from_data_file(data.file)
            record = records[0]
        problem = DnaOptimizationProblem.from_record(record)
        problem.max_random_iters = 1000
        problem.logger = self.logger
        success, summary, zip_data = optimization_with_report(
            target="@memory", problem=problem, project_name=record.id)
        return {
            'zip_file': {
                'data': data_to_html_data(zip_data, 'zip'),
                'name': 'optimization_report.zip',
                'mimetype': 'application/zip'
            },
            'success': success,
            'summary': summary
        }
Esempio n. 17
0
    def work(self):

        data = self.data
        self.logger(message="Initializing...")

        if data.editFeatures:
            record = sequence_to_biopython_record(data.sequence.upper())
            for feature in sorted(data.editedFeatures.values(),
                                  key=lambda f: (f.start, f.end)):
                annotate_record(
                    record,
                    feature_type="misc_feature",
                    location=(feature.start, feature.end),
                    label=feature.label,
                )
        else:
            record = records_from_data_files([data.file])[0]
        problem = DnaOptimizationProblem.from_record(record,
                                                     logger=self.logger)
        problem.optimization_stagnation_tolerance = 30
        success, summary, zip_data = problem.optimize_with_report(
            target="@memory", project_name=record.id)
        return {
            "zip_file": {
                "data": data_to_html_data(zip_data, "zip"),
                "name": "optimization_report.zip",
                "mimetype": "application/zip",
            },
            "success": success,
            "summary": summary,
        }
Esempio n. 18
0
def load_template(filename, insert, destination):
	''' func descriptor '''

	objectives = []
	constraints = []

	vector = SeqIO.read(filename, "genbank")
	

	vector, insert_location = insert_into_vector(vector, destination, insert)

	problem = DnaOptimizationProblem.from_record(vector)
	constraints += problem.constraints
	objectives += problem.objectives

	#feats = [feat.qualifiers for feat in vector.features]
	#dnachisel hasn't implemented MultiLocation yet
	#vector_location = FeatureLocation(insert_location.end, len(vector)) + FeatureLocation(0,insert_location.start)
	#vector_location_us = Location(0, insert_location.start, 1)
	#vector_location_ds = Location(insert_location.end, len(vector), 1)

	#constraints.append(EnforceTranslation(Location.from_biopython_location(insert_location)))
	#constraints.append(AvoidChanges(vector_location_us))
	#constraints.append(AvoidChanges(vector_location_ds))


	#This seq should be a SeqRecord object
	return vector, objectives, constraints
Esempio n. 19
0
def test_rca_example():
    """Test a Genbank with ~harmonize_rca feature."""
    path = os.path.join("tests", "tests_from_genbanks", "genbanks", "rca_example.gb")
    problem = DnaOptimizationProblem.from_record(path)
    assert str(problem.objectives) == "[HarmonizeRCA[0-105(+)](e_coli -> h_sapiens)]"
    assert problem.objectives[0].original_species == "e_coli"
    assert problem.objectives[0].species == "h_sapiens"
    problem.optimize()
Esempio n. 20
0
def test_AvoidRareCodons_as_constraint_reversed():
    numpy.random.seed(123)

    sequence = "ATG" "TTT" "ATA" "CCA" "CTT" "TAG"
    rev_sequence = reverse_complement(sequence)
    location = (0, len(sequence), -1)
    problem = DnaOptimizationProblem(
        sequence=rev_sequence,
        constraints=[
            EnforceTranslation(location=location),
            AvoidRareCodons(0.11, "e_coli", location=location),
        ],
    )
    assert problem.all_constraints_pass()
    assert problem.sequence_edits_as_array().sum() == 4
    new_sequence = reverse_complement(problem.sequence)
    assert translate(new_sequence) == translate(sequence)
Esempio n. 21
0
def test_avoid_matches_with_list():
    pattern_1 = "CGTCTC"
    pattern_2 = "TGCACA"
    sequence = 10 * "A" + pattern_1 + 20 * "A" + pattern_2 + 10 * "A"
    avoided_seqs = [
        10 * "G" + pattern_1 + 10 * "G",
        10 * "G" + pattern_2 + 10 * "G",
    ]
    constraint = AvoidMatches(sequences=avoided_seqs, match_length=6)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[constraint],
                                     logger=None)
    cst_eval = constraint.evaluate(problem)
    assert len(cst_eval.locations) == 2
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    constraint.remove_temp_directory()
def test_EnforceTranlationError():
    """Providing a location that is not multiple of 3 raises an error"""
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(50, seed=123))
    with pytest.raises(ValueError) as err:
        problem = DnaOptimizationProblem(
            sequence=sequence,
            constraints=[EnforceTranslation(location=(0, 16))],
        )
    assert "Location 0-16(+) has length 16" in str(err.value)
Esempio n. 23
0
 def compute_forbidden_patterns_locations(self, record):
     """Return an array where ``arr[i] == 1`` means that i is surrounded by
     a user-forbidden pattern."""
     pattern_constraints = [
         AvoidPattern(homopolymer_pattern(c, 5)) for c in 'ATGC'
     ]
     kmer_constraints = [
         AvoidPattern(repeated_kmers(k, n))
         for k, n in [(4, 2), (3, 3), (2, 4)]
     ]
     problem = DnaOptimizationProblem(sequence=record,
                                      constraints=pattern_constraints +
                                      kmer_constraints)
     constraints_breaches = group_overlapping_segments([
         (f.location.start, f.location.end)
         for ev in problem.constraints_evaluations()
         for f in ev.locations_to_biopython_features() if not ev.passes
     ])
     return segments_to_array(constraints_breaches, len(record))
Esempio n. 24
0
def test_record_with_multispec_feature():
    sequence = random_dna_sequence(100)
    record = sequence_to_biopython_record(sequence)
    label = "@gc(40-60%/20bp) & @no(BsaI_site) & @keep"
    annotate_record(record, label=label)
    problem = DnaOptimizationProblem.from_record(record)
    assert len(problem.constraints) == 3
    c1, c2, c3 = problem.constraints
    assert c1.mini == 0.4
    assert c2.pattern.name == "BsaI"
def test_optimization_with_report_no_solution(tmpdir):
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[AvoidPattern(enzyme='BsmBI'), AvoidChanges()]
    )
    target = os.path.join(str(tmpdir), 'no_solution')
    os.mkdir(target)
    assert os.listdir(target) == []
    success, message, data = optimization_with_report(target, problem)
    assert not success
    assert os.listdir(target) != []
Esempio n. 26
0
def test_cuba_example_1():
    path = os.path.join(
        "tests", "tests_from_genbanks", "genbanks", "cuba_example_1.gbk"
    )
    problem = DnaOptimizationProblem.from_record(path)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert problem.objective_scores_sum() < -100
    problem.optimize()
    assert problem.objective_scores_sum() > -0.1
Esempio n. 27
0
def test_cuba_example_1():
    path = os.path.join('tests', 'tests_from_genbanks', 'genbanks',
                        'cuba_example_1.gbk')
    record = load_record(path)
    problem = DnaOptimizationProblem.from_record(record)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert problem.objective_scores_sum() < -100
    problem.optimize()
    assert problem.objective_scores_sum() > -0.1
def test_avoid_change_as_objectives_basics():
    numpy.random.seed(123)
    results = []
    for boost in (0, 0.1, 0.2, 1):
        sequence = random_dna_sequence(1000, seed=123)
        problem = DnaOptimizationProblem(
            sequence=sequence,
            objectives=[
                EnforceGCContent(
                    mini=0.45, maxi=0.55,
                    window=80).copy_with_changes(locations_span=300),
                AvoidChanges(boost=boost).as_passive_objective()
            ])

        problem.optimize()
        differences = sequences_differences(problem.sequence,
                                            problem.sequence_before)
        results.append(differences)
    assert results[0] > 40
    assert (results[0] > results[1] > results[2] > results[3])
    assert results[-1] == 0
Esempio n. 29
0
def test_pattern_and_reverse():
    bsmbi = "CGTCTC"
    bsmbi_rev = "GAGACG"
    sequence = 10 * bsmbi + 25 * bsmbi_rev + 15 * bsmbi + 15 * bsmbi_rev
    problem = DnaOptimizationProblem(sequence,
                                     constraints=[AvoidPattern('BsmBI_site')],
                                     objectives=[AvoidChanges()])
    problem.resolve_constraints()
    problem.optimize()
    assert sum(problem.sequence_edits_as_array()) < 70
Esempio n. 30
0
def test_EnforceRegionsCompatibility():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(123)

    def compatibility_condition(location1, location2, problem):
        seq1 = location1.extract_sequence(problem.sequence)
        seq2 = location2.extract_sequence(problem.sequence)
        return sequences_differences(seq1, seq2) >= 2

    locations = [(0, 4), (50, 54), (100, 104), (150, 154)]
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(200, seed=123),
        constraints=[
            EnforceRegionsCompatibility(
                locations=locations,
                compatibility_condition=compatibility_condition,
                condition_label="2bp difference",
            ),
            EnforceGCContent(mini=0.4, maxi=0.6, window=40),
        ],
        logger=None,
    )
    assert not any([e.passes for e in problem.constraints_evaluations()])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    seq = problem.sequence
    assert [
        sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2
        for (s1, e1), (s2, e2) in itertools.combinations(locations, 2)
    ]