예제 #1
0
def test_EnforceTranslation_bacterial_valine():
    table_name = "Bacterial"
    protein = "LLTMMVTTTTVMVL"
    protein_sequence = reverse_translate(protein, table=table_name)

    for first_codon_before, first_codon_after in [
        ("ATG", "ATG"),  # methionine stays the only methionine codon
        ("GTG", "GTG"),  # valine-start-codon stays the only valine-start-codon
    ]:
        sequence = first_codon_before + protein_sequence
        cds_constraint = EnforceTranslation(
            genetic_table="Bacterial", start_codon="keep"
        )
        problem = DnaOptimizationProblem(
            sequence=sequence,
            constraints=[cds_constraint],
            objectives=[EnforceChanges()],
            logger=None,
        )
        assert problem.constraints[0].translation == "MLLTMMVTTTTVMVL"
        problem.optimize()
        protein_after = translate(
            problem.sequence, table_name, assume_start_codon=True
        )
        assert protein_after == "M" + protein
        assert problem.sequence[:3] == first_codon_after
예제 #2
0
 def optimize(self, codon_table):
     self.optimize_frequent(codon_table)
     # return
     opt_codons = self.__vaccine_codons_gen.copy()
     self.__vaccine_codons_gen.clear()
     vac_strand = self.get_strand(opt_codons)
     #vir_strand = self.get_strand(self.__virus_codons)
     codon_table = pct.get_codons_table(codon_table)
     problem = DnaOptimizationProblem(
         sequence=vac_strand,
         constraints=[
             EnforceTranslation(genetic_table='Standard',
                                start_codon='ATG'),
             EnforceGCContent(mini=0.54, maxi=0.9, window=120)
         ],
         objectives=[
             CodonOptimize(method="use_best_codon",
                           codon_usage_table=codon_table)
         ]
     )
     problem.resolve_constraints()
     problem.optimize()
     self.__vaccine_codons_gen = []
     count = 1
     vcodon = ""
     for x in problem.sequence:
         if count % 3 == 0:
             vcodon += x
             self.__vaccine_codons_gen.append(vcodon)
             vcodon = ""
         else:
             vcodon += x
         count += 1
     return
예제 #3
0
def test_optimization_2():
    sequence_path = os.path.join("tests", "data",
                                 "test_optimization_sequence_2.fa")
    sequence = str(load_record(sequence_path).seq)[:5500]

    deluxe_dna = CommercialDnaOffer(
        name="DeluxeDNA.com",
        sequence_constraints=[SequenceLengthConstraint(max_length=4000)],
        pricing=PerBasepairPricing(0.20),
        lead_time=10,
    )

    cheap_dna = CommercialDnaOffer(
        name="CheapDNA.com",
        sequence_constraints=[
            NoPatternConstraint(enzyme="BsaI"),
            EnforceGCContent(0.3, 0.7, window=60),
        ],
        pricing=PerBasepairPricing(0.10),
        lead_time=15,
    )

    # BLOCKS TO CHUNKS ASSEMBLY

    gibson_blocks_assembly_station = DnaAssemblyStation(
        name="Gibson Blocks Assembly",
        assembly_method=GibsonAssemblyMethod(
            overhang_selector=FixedSizeSegmentSelector(10),
            min_segment_length=1000,
            max_segment_length=6000,
            duration=8,
            cost=16,
        ),
        supplier=[deluxe_dna, cheap_dna],
        coarse_grain=30,
        fine_grain=False,
        memoize=True,
        # a_star_factor="auto",
    )

    quote_before = gibson_blocks_assembly_station.get_quote(sequence)
    assert quote_before.price > 850

    objective = OptimizeManufacturability(gibson_blocks_assembly_station)

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation(location=(0, 4998))],
        objectives=[objective],
    )

    problem.randomization_threshold = 0  # Forces "random search" mode
    problem.max_random_iters = 5
    problem.optimize()

    print("OPTIMIZATION DONE, GENERATING REPORT")

    quote_after = gibson_blocks_assembly_station.get_quote(problem.sequence)
    assert quote_after.price < 580
def test_AvoidNonUniqueSegments_as_objective():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    specification = AvoidNonUniqueSegments(8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     objectives=[specification])
    problem.optimize()
    assert problem.objectives[0].evaluate(problem).passes
예제 #5
0
def test_UniquifyAllKmers_as_objective():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    specification = UniquifyAllKmers(8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     objectives=[specification],
                                     logger=None)
    problem.optimize()
    assert problem.objectives[0].evaluate(problem).passes
예제 #6
0
def test_codon_optimize_with_custom_table():
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(1200, seed=123),
        constraints=[EnforceTranslation()],
        objectives=[CodonOptimize(
            codon_usage_table=biotools.CODON_USAGE_TABLES['b_subtilis'])]
    )
    assert (problem.objective_scores_sum() < -10)
    problem.optimize()
    assert (problem.objective_scores_sum() == 0)
예제 #7
0
def test_pattern_and_reverse():
    bsmbi = "CGTCTC"
    bsmbi_rev = "GAGACG"
    sequence = 10 * bsmbi + 25 * bsmbi_rev + 15 * bsmbi + 15 * bsmbi_rev
    problem = DnaOptimizationProblem(sequence,
                                     constraints=[AvoidPattern('BsmBI_site')],
                                     objectives=[AvoidChanges()])
    problem.resolve_constraints()
    problem.optimize()
    assert sum(problem.sequence_edits_as_array()) < 70
예제 #8
0
def test_codon_optimize_with_custom_table():
    table = get_codons_table("b_subtilis")
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(1200, seed=123),
        constraints=[EnforceTranslation()],
        objectives=[CodonOptimize(codon_usage_table=table)],
        logger=None,
    )
    assert problem.objective_scores_sum() < -10
    problem.optimize()
    assert problem.objective_scores_sum() == 0
예제 #9
0
def test_AvoidRareCodons_as_objective():
    numpy.random.seed(123)

    sequence = "ATG" "TTT" "ATA" "CCA" "CTT" "TAG"
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[AvoidRareCodons(0.11, "e_coli")],
    )
    assert abs(problem.objective_scores_sum() + 0.09) < 0.001
    problem.optimize()
    assert problem.objective_scores_sum() == 0
예제 #10
0
def test_codon_optimize_bestcodon():
    numpy.random.seed(123)
    protein = random_protein_sequence(3000, seed=123)
    sequence = reverse_translate(protein)
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[CodonOptimize(species='e_coli')]
    )
    assert problem.objective_scores_sum() < 0
    problem.optimize()
    assert problem.objective_scores_sum() == 0
예제 #11
0
def test_codon_optimize_harmonized():
    numpy.random.seed(123)
    protein = random_protein_sequence(500, seed=123)
    sequence = reverse_translate(protein)
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[CodonOptimize(species='e_coli', mode='harmonized')]
    )
    assert (-700 < problem.objective_scores_sum() < -600)
    problem.optimize()
    assert (-350 < problem.objective_scores_sum())
예제 #12
0
def test_codon_optimize_harmonized_short_sequence():
    protein = "DDDKKKKKK"
    sequence = reverse_translate(protein)
    harmonization = CodonOptimize(species='b_subtilis', mode='harmonized')
    problem = DnaOptimizationProblem(
                sequence=sequence,
                constraints=[EnforceTranslation()],
                objectives=[harmonization]
            )
    assert problem.objective_scores_sum() < -7
    problem.optimize()
    assert -1 < problem.objective_scores_sum()
예제 #13
0
def test_AvoidRareCodons_as_objective_reversed():
    numpy.random.seed(123)

    sequence = "ATG" "TTT" "ATA" "CCA" "CTT" "TAG"
    rev_sequence = reverse_complement(sequence)
    location = (0, len(sequence), -1)
    problem = DnaOptimizationProblem(
        sequence=rev_sequence,
        constraints=[EnforceTranslation(location=location)],
        objectives=[AvoidRareCodons(0.11, "e_coli", location=location)],
    )
    assert abs(problem.objective_scores_sum() + 0.09) < 0.001
    problem.optimize()
    assert problem.objective_scores_sum() == 0
예제 #14
0
def test_avoid_changes_with_indices_as_constraint():
    numpy.random.seed(123)

    indices = [10, 20] + list(range(30, 40)) + [44, 45, 46]
    sequence = random_dna_sequence(50)

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidChanges(indices=indices)],
        objectives=[EnforceChanges()],
        logger=None,
    )
    problem.optimize()
    assert problem.number_of_edits() == 50 - 15
예제 #15
0
def test_codon_optimize_match_usage():
    numpy.random.seed(123)
    protein = random_protein_sequence(500, seed=123)
    sequence = reverse_translate(protein)
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[
            CodonOptimize(species="e_coli", method="match_codon_usage")
        ],
        logger=None,
    )
    assert -600 < problem.objective_scores_sum() < -550
    problem.optimize()
    assert -350 < problem.objective_scores_sum()
예제 #16
0
 def create_new_primer(existing_primers):
     """Create a new primer based on the primers created so far"""
     problem = DnaOptimizationProblem(
         sequence=random_dna_sequence(length=20),
         constraints=[
             AvoidHeterodimerization(existing_primers, tmax=3),
             AvoidPattern("3x3mer"),
             AvoidPattern("4xG"),
         ],
         objectives=[EnforceGCContent(target=0.6)],
         logger=None,
     )
     problem.resolve_constraints()
     problem.optimize()
     return problem.sequence
예제 #17
0
def test_codon_optimize_harmonize_rca_short_sequence():
    protein = random_protein_sequence(500, seed=123)
    sequence = reverse_translate(protein)
    harmonization = CodonOptimize(species="h_sapiens",
                                  original_species="e_coli",
                                  method="harmonize_rca")
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[harmonization],
        logger=None,
    )
    assert problem.objective_scores_sum() < -123
    problem.optimize()
    assert -74 < problem.objective_scores_sum()
예제 #18
0
def test_EnforceSequence_as_objective():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(1234)
    n_nucleotides = 15
    start = 50
    location = (start, start + n_nucleotides)
    problem = DnaOptimizationProblem(
        sequence=25 * "ATGC",
        constraints=[AvoidPattern("ATGC")],
        objectives=[EnforceSequence("W" * n_nucleotides, location=location)])
    assert problem.objective_scores_sum() < 0
    problem.resolve_constraints()
    problem.optimize()
    assert problem.objective_scores_sum() == 0
예제 #19
0
def test_codon_optimize_match_usage_short_sequence():
    numpy.random.seed(123)
    protein = "DDDKKKKKK"
    sequence = reverse_translate(protein)
    harmonization = CodonOptimize(species="b_subtilis",
                                  method="match_codon_usage")
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[harmonization],
        logger=None,
    )
    assert problem.objective_scores_sum() < -5.5
    problem.optimize()
    assert -0.6 < problem.objective_scores_sum()
    print(problem.objective_scores_sum())
    assert problem.sequence == "GATGATGACAAGAAAAAGAAAAAAAAA"
예제 #20
0
def test_codon_optimize_match_usage_gfp_sequence():
    sequence = ("ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTG"
                "GTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGCGCGGC"
                "GAGGGCGAGGGCGATGCCACCAACGGCAAGCTGACCCTGAAGTTCATC")
    spec = CodonOptimize(species="s_cerevisiae", method="match_codon_usage")
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[spec],
        logger=None,
    )
    assert problem.objective_scores_sum() < -61
    problem.optimize()
    assert problem.objective_scores_sum() > -16

    # Just for coverage, we run the compare_frequency function in text mode
    spec = problem.objectives[0]
    codons = spec.get_codons(problem)
    print(spec.compare_frequencies(codons, text_mode=True))
def test_avoid_change_as_objectives_basics():
    numpy.random.seed(123)
    results = []
    for boost in (0, 0.1, 0.2, 1):
        sequence = random_dna_sequence(1000, seed=123)
        problem = DnaOptimizationProblem(
            sequence=sequence,
            objectives=[
                EnforceGCContent(
                    mini=0.45, maxi=0.55,
                    window=80).copy_with_changes(locations_span=300),
                AvoidChanges(boost=boost).as_passive_objective()
            ])

        problem.optimize()
        differences = sequences_differences(problem.sequence,
                                            problem.sequence_before)
        results.append(differences)
    assert results[0] > 40
    assert (results[0] > results[1] > results[2] > results[3])
    assert results[-1] == 0
예제 #22
0
파일: function.py 프로젝트: rmcl/supergsl
    def create_new_sequence(
        self,
        naive_target_sequence : str,
        codon_usage_table : Optional[str],
        existing_sequences : List[str]
    ) -> str:
        """Run DNAChisel to create a new codon optimized DNA sequence

        """
        constraints=[
            EnforceTranslation(),
            #EnforceGCContent(mini=0.4, maxi=0.6, window=60),
        ]

        constraints.extend([
            AvoidPattern(sequence)
            for sequence in existing_sequences
        ])

        problem = DnaOptimizationProblem(
            sequence=naive_target_sequence,
            constraints=constraints,
            objectives=[MatchTargetCodonUsage(species="s_cerevisiae")],
        )

        #print("\nBefore optimization:\n")
        #print(problem.constraints_text_summary())
        #print(problem.objectives_text_summary())

        problem.resolve_constraints(final_check=True)
        problem.optimize()

        #print("\nAfter optimization:\n")
        #print(problem.constraints_text_summary())
        #print(problem.objectives_text_summary())

        return problem.sequence
예제 #23
0
def test_optimization_1():
    company_ingen = CommercialDnaOffer(
        name="Company InGen",
        pricing=PerBasepairPricing(0.08),
        sequence_constraints=[NoPatternConstraint(enzyme="AarI")],
    )
    company_delux = CommercialDnaOffer(
        name="Company Delux",
        pricing=PerBasepairPricing(0.66),
        sequence_constraints=[],
    )

    assembly_station = DnaAssemblyStation(
        name="Gibson Assembly Station",
        assembly_method=GibsonAssemblyMethod(
            overhang_selector=FixedSizeSegmentSelector(20),
            min_segment_length=200,
            max_segment_length=1200,
        ),
        supplier=[company_ingen, company_delux],
        coarse_grain=20,
        # a_star_factor="auto",
    )
    sequence_path = os.path.join("tests", "data",
                                 "test_optimization_sequence_1.fa")
    sequence = load_record(sequence_path)
    objective = OptimizeManufacturability(assembly_station)
    problem = DnaOptimizationProblem(sequence=sequence, objectives=[objective])
    quote = objective.get_quote(problem)
    score = problem.objective_scores_sum()
    assert -367 < score < -366
    problem.randomization_threshold = 0
    problem.max_random_iters = 5
    problem.optimize()
    score = problem.objective_scores_sum()
    assert -244 < score < -243
예제 #24
0
    def domesticate(
        self,
        dna_sequence=None,
        protein_sequence=None,
        is_cds="default",
        codon_optimization=None,
        extra_constraints=(),
        extra_objectives=(),
        final_record_target=None,
        edit=False,
        barcode="",
        barcode_spacer="AA",
        report_target=None,
    ):
        """Domesticate a sequence.

        Parameters
        ----------

        dna_sequence
          The DNA sequence string to domesticate.

        protein_sequence
          Amino-acid sequence of the protein, which will be converted into
          a DNA sequence string.

        is_cds
          If True, sequence edits are restricted to synonymous mutations.

        codon_optimization
          Either None for no codon optimization or the name of an organism
          supported by DnaChisel.

        extra_constraints
          List of extra constraints to apply to the domesticated sequences.
          Each constraint is either a DnaChisel constraint or a function
          (dna_sequence => DnaChisel constraint).

        extra_objectives
          List of extra optimization objectives to apply to the domesticated
          sequences. Each objective is either a DnaChisel constraint or a
          function (dna_sequence => DnaChisel constraint).

        final_record_target
          Path to the file where to write the final genbank.

        edit
          Turn to True to allow sequence edits (if it is false and no all
          constraints are originally satisfied, a failed domestication result
          (i.e. with attribute ``success`` set to False) will be returned.

        report_target
          Target for the sequence optimization report (a folder path, or a zip
          path).

        barcode
          A sequence of DNA that will be added to the left of the sequence once
          the domestication is done.

        barcode_spacer
          Nucleotides to be added between the barcode and the enzyme (optional,
          the idea here is that they will make sure to avoid the creation of
          unwanted cutting sites).

        Returns
        -------

        final_record, edits_record, report_data, success, msg
        """
        if is_cds == "default":
            is_cds = self.cds_by_default
        if isinstance(dna_sequence, SeqRecord):
            problem = DnaOptimizationProblem.from_record(dna_sequence)
            for spec in problem.constraints + problem.objectives:
                spec.location += len(self.left_flank)
            extra_constraints = list(extra_constraints) + problem.constraints
            extra_objectives = list(extra_constraints) + problem.objectives

        if protein_sequence is not None:
            is_cds = True
            dna_sequence = reverse_translate(protein_sequence)
        constraints = [
            c(dna_sequence) if hasattr(c, "__call__") else c
            for c in list(extra_constraints) + self.constraints
        ]
        location = Location(len(self.left_flank),
                            len(self.left_flank) + len(dna_sequence))
        if is_cds:
            constraints.append(EnforceTranslation(location=location))
        objectives = [
            o(dna_sequence) if hasattr(o, "__call__") else o
            for o in list(extra_objectives) + self.objectives
        ]
        if codon_optimization:
            objectives.append(
                CodonOptimize(species=codon_optimization, location=location))
        if self.minimize_edits:
            objectives.append(AvoidChanges())

        extended_sequence = self.left_flank + dna_sequence + self.right_flank

        if (not is_cds) and (not edit):
            constraints.append(AvoidChanges())
        problem = DnaOptimizationProblem(
            extended_sequence,
            constraints=constraints,
            objectives=objectives,
            logger=self.logger,
        )
        all_constraints_pass = problem.all_constraints_pass()
        no_objectives = (len(problem.objectives) - self.minimize_edits) == 0
        report_data = None
        optimization_successful = True
        message = ""
        # print (all_constraints_pass, no_objectives)
        if not (all_constraints_pass and no_objectives):
            problem.n_mutations = self.simultaneous_mutations

            if report_target is not None:
                (success, message, report_data) = problem.optimize_with_report(
                    target=report_target, project_name=self.name)
                optimization_successful = success
            else:
                report_data = None
                try:
                    problem.resolve_constraints()
                    problem.optimize()
                except Exception as err:
                    message = str(err)
                    optimization_successful = False
                    report_data = None
        final_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
        )
        edits_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
            with_sequence_edits=True,
        )
        if final_record_target is not None:
            SeqIO.write(final_record, final_record_target, "genbank")

        return DomesticationResult(
            problem.sequence_before,
            final_record,
            edits_record,
            report_data,
            optimization_successful,
            message,
        )
예제 #25
0
    random_protein_sequence,
    reverse_translate,
    CodonOptimize,
    EnforceTranslation,
    AvoidPattern,
    EnforceGCContent,
)

protein = random_protein_sequence(1000, seed=123)
sequence = reverse_translate(protein)
problem = DnaOptimizationProblem(
    sequence=sequence,
    constraints=[
        EnforceTranslation(),
        AvoidPattern("BsmBI_site"),
        EnforceGCContent(mini=0.4, maxi=0.6, window=60),
    ],
    objectives=[CodonOptimize(species="s_cerevisiae")],
)

print("\nBefore optimization:\n")
print(problem.constraints_text_summary())
print(problem.objectives_text_summary())

problem.resolve_constraints(final_check=True)
problem.optimize()

print("\nAfter optimization:\n")
print(problem.constraints_text_summary())
print(problem.objectives_text_summary())