def experiment_1(seed=123):
    """A DNA chisel optimization whose results produced the file
    test_determinism.py"""
    np.random.seed(seed)

    sequence = dc.reverse_translate(dc.random_protein_sequence(50))

    # MAXIMIZE THE GC CONTENT

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[dc.EnforceTranslation()],
        objectives=[dc.EnforceGCContent(target=1)],
        logger=None,
    )
    problem.optimize()

    # BRING THE GC CONTENT BACK TO 50%

    problem = dc.DnaOptimizationProblem(
        sequence=problem.sequence,
        constraints=[dc.EnforceTranslation()],
        objectives=[dc.EnforceGCContent(target=0.5)],
        logger=None,
    )
    problem.optimize()

    return problem.sequence
Exemple #2
0
def test_whole_sequence_change_objective_100():
    np.random.seed(123)
    problem = dc.DnaOptimizationProblem(
        sequence=dc.random_dna_sequence(50), objectives=[dc.EnforceChanges()]
    )
    problem.optimize()
    assert problem.number_of_edits() == 50
Exemple #3
0
def test_parameterization():
    def all_none(variables):
        return all([c is None for c in variables])

    problem1 = dc.DnaOptimizationProblem(
        sequence=200 * "A",
        constraints=[
            dc.EnforceChanges(),
            dc.EnforceChanges(minimum=20),
            dc.EnforceChanges(minimum_percent=5),
        ],
        objectives=[
            dc.EnforceChanges(),
            dc.EnforceChanges(amount=20),
            dc.EnforceChanges(amount_percent=5),
        ],
    )

    record = dc.sequence_to_biopython_record(200 * "A")
    dc.annotate_record(record, label="@change")
    dc.annotate_record(record, label="@change(minimum=20)")
    dc.annotate_record(record, label="@change(minimum=5%)")
    dc.annotate_record(record, label="~change")
    dc.annotate_record(record, label="~change(amount=20)")
    dc.annotate_record(record, label="~change(5%)")
    problem2 = dc.DnaOptimizationProblem.from_record(record)

    for problem in [problem1, problem2]:

        # CHECK CONSTRAINTS

        c100 = problem.constraints[0]
        assert c100.minimum == 200
        assert c100.minimum_percent == 100
        assert all_none([c100.amount, c100.amount_percent])

        c20 = problem.constraints[1]
        assert c20.minimum == 20
        assert all_none([c20.minimum_percent, c20.amount, c20.amount_percent])

        c5 = problem.constraints[2]
        assert c5.minimum == 10
        assert c5.minimum_percent == 5
        assert all_none([c5.amount, c5.amount_percent])

        # CHECK OBJECTIVES

        o100 = problem.objectives[0]
        assert o100.amount == 200
        assert o100.amount_percent == 100
        assert all_none([o100.minimum, o100.minimum_percent])

        o20 = problem.objectives[1]
        assert o20.amount == 20
        assert all_none([o20.minimum_percent, o20.minimum, o20.amount_percent])

        o5 = problem.objectives[2]
        assert o5.amount == 10
        assert o5.amount_percent == 5
        assert all_none([o5.minimum, o5.minimum_percent])
Exemple #4
0
def make_restriction_part(part_length, left_overhang, right_overhang,
                          enzyme, forbidden_enzymes, assembly_enzyme='BsmBI'):
    l_left = len(left_overhang)
    l_right = len(right_overhang)
    left_overhang_location = (0, l_left)
    right_overhang_location = (l_left + part_length,
                               l_left + part_length + l_right)
    center_location = (l_left, l_left + part_length)
    core_sequence = (left_overhang + dc.random_dna_sequence(part_length)
                     + right_overhang)
    enforce_enzyme = dc.EnforcePatternOccurence(
        enzyme=enzyme, location=center_location)
    problem = dc.DnaOptimizationProblem(
        sequence=core_sequence,
        constraints=[
            dc.AvoidChanges(left_overhang_location),
            dc.AvoidChanges(right_overhang_location),
        ] + [enforce_enzyme] + [
            dc.AvoidPattern(enzyme=enzyme_name)
            for enzyme_name in forbidden_enzymes + [assembly_enzyme]
        ]
    )
    problem.resolve_constraints()
    core_sequence = dc.sequence_to_biopython_record(problem.sequence)
    for loc in [left_overhang_location, right_overhang_location]:
        dc.annotate_record(core_sequence, loc, 'overhang')
    site_location = enforce_enzyme.evaluate(problem).data['matches'][0]
    dc.annotate_record(core_sequence, site_location.to_tuple(), enzyme)
    assembly_site = Restriction.__dict__[assembly_enzyme].site
    flank = dc.sequence_to_biopython_record(assembly_site + 'A')
    dc.annotate_record(flank, label='flank')
    return flank + core_sequence + flank.reverse_complement()
Exemple #5
0
def test_whole_sequence_change_constraint_100():
    np.random.seed(123)
    problem = dc.DnaOptimizationProblem(
        sequence=dc.random_dna_sequence(50), constraints=[dc.EnforceChanges()]
    )
    assert problem.all_constraints_pass()  # due to initial seq. constraining
    assert problem.number_of_edits() == 50
Exemple #6
0
def test_SequenceLengthBounds():
    for length, expected in [(750, True), (400, False), (1200, False)]:
        problem = dc.DnaOptimizationProblem(
            sequence=dc.random_dna_sequence(length),
            constraints=[dc.SequenceLengthBounds(500, 800)],
            logger=None,
        )
        assert problem.all_constraints_pass() == expected
Exemple #7
0
def test_enforce_changes_with_indices_as_constraint():
    np.random.seed(123)
    indices = [10, 20] + list(range(30, 40)) + [44, 45, 46]
    problem = dc.DnaOptimizationProblem(
        sequence=dc.random_dna_sequence(50),
        constraints=[dc.EnforceChanges(indices=indices)],
    )
    assert problem.number_of_edits() == 15
def test_insert_and_erase_pattern():
    numpy.random.seed(123)
    protein = dc.random_protein_sequence(100)
    pattern = "ATGC"

    # CREATE A SEQUENCE WITH 0 PATTERN OCCURENCES

    sequence = dc.random_compatible_dna_sequence(
        sequence_length=300,
        constraints=[
            dc.EnforceTranslation(translation=protein),
            dc.AvoidPattern(pattern),
        ],
        logger=None,
    )

    # NOW INCREASE PATTERN OCCURENCES FROM 0 TO 5

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(pattern, occurences=5),
            dc.EnforceTranslation(),
        ],
        logger=None,
    )
    assert problem.constraints[0].evaluate(problem).score == -5
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    sequence = problem.sequence

    # NOW DECREASE THE NUMBER OF OCCURENCES FROM 5 TO 2

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(pattern, occurences=2),
            dc.EnforceTranslation(),
        ],
        logger=None,
    )
    assert problem.constraints[0].evaluate(problem).score == -3
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Exemple #9
0
def test_whole_sequence_change_constraint_4():
    np.random.seed(123)
    problem = dc.DnaOptimizationProblem(
        sequence=dc.random_dna_sequence(50),
        constraints=[dc.EnforceChanges(minimum=4)],
    )
    print(problem.number_of_edits())
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert 6 >= problem.number_of_edits() >= 4
Exemple #10
0
def random_compatible_dna_sequence(sequence_length, constraints, probas=None,
                                   seed=None, max_random_iters=5000,
                                   logger='bar', **kwargs):

    sequence = dc.random_dna_sequence(
        sequence_length, probas=probas, seed=seed)
    problem = dc.DnaOptimizationProblem(sequence, constraints=constraints,
                                        logger=logger)
    problem.max_random_iters = max_random_iters
    problem.resolve_constraints(**kwargs)
    return problem.sequence
Exemple #11
0
def test_whole_sequence_change_objective_20_going_down():
    np.random.seed(123)
    problem = dc.DnaOptimizationProblem(
        sequence=20*"AT",
        constraints=[dc.AvoidPattern("ATA")],
        objectives=[dc.EnforceChanges(amount=20)],
    )
    problem.mutations_per_iteration = 2
    problem.resolve_constraints()
    assert problem.number_of_edits() >= 24
    problem.optimize()
    assert problem.number_of_edits() == 20
def create_problem(boost_profile):
    location = dc.Location(1000, 9247)
    objectives = []
    for spec_name, boost in boost_profile.items():
        spec = specifications[spec_name]
        spec = spec.copy_with_changes(boost=boost, location=location)
        objectives.append(spec)
    return dc.DnaOptimizationProblem(
        sequence,
        constraints=[dc.EnforceTranslation(location=location)],
        objectives=objectives,
    )
Exemple #13
0
def test_maximal_protein_sequence_change():
    np.random.seed(123)
    protein = dc.random_protein_sequence(200)
    sequence = dc.reverse_translate(protein)
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[dc.EnforceTranslation()],
        objectives=[dc.EnforceChanges()],
    )
    problem.resolve_constraints()
    problem.optimize()
    assert problem.number_of_edits() == 238
    assert dc.translate(problem.sequence) == protein
Exemple #14
0
def test_enforce_changes_with_indices_vs_avoid_changes():
    np.random.seed(123)
    indices = [10, 20] + list(range(30, 40)) + [44, 45, 46]
    sequence = dc.random_dna_sequence(50)
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        objectives=[
            dc.EnforceChanges(indices=indices),
            dc.AvoidChanges(boost=0.5),
        ],
    )
    problem.optimize()
    assert problem.number_of_edits() == 15

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        objectives=[
            dc.EnforceChanges(indices=indices),
            dc.AvoidChanges(boost=1.5),
        ],
    )
    problem.optimize()
    assert problem.number_of_edits() == 0
def experiment_2(seed=123):
    np.random.seed(seed)
    sequence = dc.reverse_translate(dc.random_protein_sequence(1000))
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforceTranslation(),
            dc.EnforceGCContent(mini=0.4, maxi=0.6, window=50),
        ],
        objectives=[dc.CodonOptimize(species="e_coli")],
        logger=None,
    )
    problem.resolve_constraints()
    problem.optimize()
    return problem.sequence
Exemple #16
0
def random_compatible_dna_sequence(sequence_length,
                                   constraints,
                                   probas=None,
                                   seed=None,
                                   max_random_iters=5000,
                                   logger="bar",
                                   **kwargs):
    """Produce a random sequence complying to some specifications.

    Parameters
    ----------

    sequence_length
      You guessed it.

    probas
      Either None for a fully random initial sequence, or a dict of the form
      {"A": 0.5, "T": 0.2, ...} to tune initial nucleotide representation

    constraints
      List of all DnaChisel specifications that will be applied as constraints.

    seed
      Optional seed for the random number generator, for reproducibility.

    max_random_iters
      Maximum number of random tries per location solving for the solver.

    logger
      Either 'bar' or None (no logger) or any proglog logger.

    """
    sequence = dc.random_dna_sequence(sequence_length,
                                      probas=probas,
                                      seed=seed)
    problem = dc.DnaOptimizationProblem(sequence,
                                        constraints=constraints,
                                        logger=logger)
    problem.max_random_iters = max_random_iters
    problem.resolve_constraints(**kwargs)
    return problem.sequence
    supplier=[deluxe_dna, cheap_dna],
    coarse_grain=30,
    fine_grain=False,
    memoize=True,
    a_star_factor="auto",
)

quote_before = gibson_blocks_assembly_station.get_quote(
    sequence, with_assembly_plan=True)

print("LOCATING PRICE-DRIVING REGIONS AND OPTIMIZING... PLEASE WAIT")

objective = OptimizeManufacturability(gibson_blocks_assembly_station)

problem = dnachisel.DnaOptimizationProblem(
    sequence=sequence,
    constraints=[dnachisel.EnforceTranslation(location=(0, 9999))],
    objectives=[objective])

problem.randomization_threshold = 0  # Forces "random search" mode
problem.max_random_iters = 5
problem.optimize()

print("OPTIMIZATION DONE, GENERATING REPORT")

quote_after = gibson_blocks_assembly_station.get_quote(problem.sequence,
                                                       with_assembly_plan=True)

fig, axes = plt.subplots(2, figsize=(6, 4))
for title, quote, ax in zip(
    ["Before, optimization", "After optimization"],
    [quote_before, quote_after],
Exemple #18
0
def test_enforce_pattern_options():
    # Checks for Github issue #53
    # Test 6 cases: location yes/no, 3 strand options

    sequence = "A" * 10
    pattern = "C" * 4
    # location=None
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(pattern,
                                       occurences=1,
                                       strand="from_location"),
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(pattern, occurences=1, strand="both")
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(pattern, occurences=1, strand=-1)
        ],
        logger=None,
    )
    assert problem.constraints[0].evaluate(problem).score == -1
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert dc.reverse_complement(
        pattern) in problem.sequence  # other strand used

    # location specificed
    # Use -1 strand from location:
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(
                pattern,
                occurences=1,
                strand="from_location",
                location=Location(1, 6, strand=-1),
            )
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert dc.reverse_complement(pattern) in problem.sequence

    # Overwrite -1 strand to "both":
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(
                pattern,
                occurences=1,
                strand="both",
                location=Location(1, 6, strand=-1),
            )
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence  # uses +1 strand by default

    # Overwrite -1 strand to +1:
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforcePatternOccurence(
                pattern,
                occurences=1,
                strand=1,
                location=Location(1, 6, strand=-1),
            )
        ],
        logger=None,
    )
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert pattern in problem.sequence  # uses +1 strand
regex = "(CCCTTT){3}C{3}"  # optimal pattern for i-motif formation
query_seq = (
    dnachisel.random_dna_sequence(length=50)
    + i_motif
    + dnachisel.random_dna_sequence(length=50)
)
print(query_seq)
seq = Bio.Seq.Seq(query_seq)

# Find first occurrence:
print(seq.find(i_motif))

# Find all:
matches = [
    (m.start(), m.end()) for m in re.finditer(i_motif, str(seq))
]  # list of tuples
print(seq[matches[0][0] : matches[0][1]])

# Find regex with DNA Chisel:
problem = dnachisel.DnaOptimizationProblem(
    sequence=query_seq, constraints=[dnachisel.AvoidPattern(pattern=regex)]
)
print(problem.constraints_text_summary())


compact_regex = "(C{3}T{3}){3}C{3}"  # variant of the same regex
problem = dnachisel.DnaOptimizationProblem(
    sequence=query_seq, constraints=[dnachisel.AvoidPattern(pattern=compact_regex)]
)
print(problem.constraints_text_summary())