コード例 #1
0
def experiment_1(seed=123):
    """A DNA chisel optimization whose results produced the file
    test_determinism.py"""
    np.random.seed(seed)

    sequence = dc.reverse_translate(dc.random_protein_sequence(50))

    # MAXIMIZE THE GC CONTENT

    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[dc.EnforceTranslation()],
        objectives=[dc.EnforceGCContent(target=1)],
        logger=None,
    )
    problem.optimize()

    # BRING THE GC CONTENT BACK TO 50%

    problem = dc.DnaOptimizationProblem(
        sequence=problem.sequence,
        constraints=[dc.EnforceTranslation()],
        objectives=[dc.EnforceGCContent(target=0.5)],
        logger=None,
    )
    problem.optimize()

    return problem.sequence
コード例 #2
0
def test_EnforceTranslation_bacterial_valine():
    table_name = "Bacterial"
    protein = "LLTMMVTTTTVMVL"
    protein_sequence = reverse_translate(protein, table=table_name)

    for first_codon_before, first_codon_after in [
        ("ATG", "ATG"),  # methionine stays the only methionine codon
        ("GTG", "GTG"),  # valine-start-codon stays the only valine-start-codon
    ]:
        sequence = first_codon_before + protein_sequence
        cds_constraint = EnforceTranslation(
            genetic_table="Bacterial", start_codon="keep"
        )
        problem = DnaOptimizationProblem(
            sequence=sequence,
            constraints=[cds_constraint],
            objectives=[EnforceChanges()],
            logger=None,
        )
        assert problem.constraints[0].translation == "MLLTMMVTTTTVMVL"
        problem.optimize()
        protein_after = translate(
            problem.sequence, table_name, assume_start_codon=True
        )
        assert protein_after == "M" + protein
        assert problem.sequence[:3] == first_codon_after
コード例 #3
0
def test_EnforceTranlation():
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(50, seed=123))
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidPattern("AAA"), EnforceTranslation()],
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
コード例 #4
0
def test_EnforceTranlationError():
    """Providing a location that is not multiple of 3 raises an error"""
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(50, seed=123))
    with pytest.raises(ValueError) as err:
        problem = DnaOptimizationProblem(
            sequence=sequence,
            constraints=[EnforceTranslation(location=(0, 16))],
        )
    assert "Location 0-16(+) has length 16" in str(err.value)
コード例 #5
0
def test_codon_optimize_harmonized_short_sequence():
    protein = "DDDKKKKKK"
    sequence = reverse_translate(protein)
    harmonization = CodonOptimize(species='b_subtilis', mode='harmonized')
    problem = DnaOptimizationProblem(
                sequence=sequence,
                constraints=[EnforceTranslation()],
                objectives=[harmonization]
            )
    assert problem.objective_scores_sum() < -7
    problem.optimize()
    assert -1 < problem.objective_scores_sum()
コード例 #6
0
def test_codon_optimize_harmonized():
    numpy.random.seed(123)
    protein = random_protein_sequence(500, seed=123)
    sequence = reverse_translate(protein)
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[CodonOptimize(species='e_coli', mode='harmonized')]
    )
    assert (-700 < problem.objective_scores_sum() < -600)
    problem.optimize()
    assert (-350 < problem.objective_scores_sum())
コード例 #7
0
def test_codon_optimize_bestcodon():
    numpy.random.seed(123)
    protein = random_protein_sequence(3000, seed=123)
    sequence = reverse_translate(protein)
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[CodonOptimize(species='e_coli')]
    )
    assert problem.objective_scores_sum() < 0
    problem.optimize()
    assert problem.objective_scores_sum() == 0
コード例 #8
0
ファイル: test_dnachisel.py プロジェクト: rmcl/supergsl
    def test_create_new_sequence(self):
        """Run the dnachisel optimizer and get a new DNA sequence."""
        target_protein = 'MAAATCAGAGAAAAC'
        naive_target_sequence = reverse_translate(target_protein)
        result = self.optimize.create_new_sequence(
            naive_target_sequence,
            None,
            []
        )

        self.assertEqual(
            Seq(result).translate(),
            target_protein)
コード例 #9
0
def test_maximal_protein_sequence_change():
    np.random.seed(123)
    protein = dc.random_protein_sequence(200)
    sequence = dc.reverse_translate(protein)
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[dc.EnforceTranslation()],
        objectives=[dc.EnforceChanges()],
    )
    problem.resolve_constraints()
    problem.optimize()
    assert problem.number_of_edits() == 238
    assert dc.translate(problem.sequence) == protein
コード例 #10
0
def test_EnforceTranslation_error_location_smaller_than_translation():
    """Providing a location that is not multiple of 3 raises an error"""
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(15, seed=123))
    with pytest.raises(ValueError) as err:
        _ = DnaOptimizationProblem(
            sequence=sequence,
            constraints=[
                EnforceTranslation(
                    translation=random_protein_sequence(30, seed=111))
            ],
            logger=None,
        )
    assert str(err.value).startswith("Window size")
コード例 #11
0
def test_EnforceTranlationReversed():
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(50, seed=123))
    rev_sequence = reverse_complement(sequence)
    problem = DnaOptimizationProblem(
        sequence=rev_sequence,
        constraints=[
            AvoidPattern("AGC"),
            EnforceTranslation(location=(0, len(sequence), -1))
        ],
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
コード例 #12
0
def test_codon_optimize_harmonize_rca_short_sequence():
    protein = random_protein_sequence(500, seed=123)
    sequence = reverse_translate(protein)
    harmonization = CodonOptimize(species="h_sapiens",
                                  original_species="e_coli",
                                  method="harmonize_rca")
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[harmonization],
        logger=None,
    )
    assert problem.objective_scores_sum() < -123
    problem.optimize()
    assert -74 < problem.objective_scores_sum()
コード例 #13
0
def experiment_2(seed=123):
    np.random.seed(seed)
    sequence = dc.reverse_translate(dc.random_protein_sequence(1000))
    problem = dc.DnaOptimizationProblem(
        sequence=sequence,
        constraints=[
            dc.EnforceTranslation(),
            dc.EnforceGCContent(mini=0.4, maxi=0.6, window=50),
        ],
        objectives=[dc.CodonOptimize(species="e_coli")],
        logger=None,
    )
    problem.resolve_constraints()
    problem.optimize()
    return problem.sequence
コード例 #14
0
def test_codon_optimize_match_usage():
    numpy.random.seed(123)
    protein = random_protein_sequence(500, seed=123)
    sequence = reverse_translate(protein)
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[
            CodonOptimize(species="e_coli", method="match_codon_usage")
        ],
        logger=None,
    )
    assert -600 < problem.objective_scores_sum() < -550
    problem.optimize()
    assert -350 < problem.objective_scores_sum()
コード例 #15
0
def test_codon_optimize_match_usage_short_sequence():
    numpy.random.seed(123)
    protein = "DDDKKKKKK"
    sequence = reverse_translate(protein)
    harmonization = CodonOptimize(species="b_subtilis",
                                  method="match_codon_usage")
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation()],
        objectives=[harmonization],
        logger=None,
    )
    assert problem.objective_scores_sum() < -5.5
    problem.optimize()
    assert -0.6 < problem.objective_scores_sum()
    print(problem.objective_scores_sum())
    assert problem.sequence == "GATGATGACAAGAAAAAGAAAAAAAAA"
コード例 #16
0
ファイル: function.py プロジェクト: rmcl/supergsl
    def execute(self, params : dict):
        """Invoke dnachissel to return matching codon optimized DNA sequence."""
        protein_sequence = params['aa_sequence'].get_sequence()

        codon_usage_table = None
        num_results = params['num_results']

        naive_target_sequence = reverse_translate(protein_sequence)
        proposed_sequences : List[str] = []
        for i in range(num_results):
            print('Optimization run %s of %s' % (i, num_results))
            new_sequence = self.create_new_sequence(
                naive_target_sequence=naive_target_sequence,
                codon_usage_table=codon_usage_table,
                existing_sequences=proposed_sequences)
            proposed_sequences.append(new_sequence)

        return Collection([
            NucleotideSequence(sequence)
            for sequence in proposed_sequences
        ])
コード例 #17
0
"""Example of use of the AvoidPAttern specification"""

from dnachisel import (
    DnaOptimizationProblem,
    random_protein_sequence,
    reverse_translate,
    CodonOptimize,
    EnforceTranslation,
    AvoidPattern,
    EnforceGCContent,
)

protein = random_protein_sequence(1000, seed=123)
sequence = reverse_translate(protein)
problem = DnaOptimizationProblem(
    sequence=sequence,
    constraints=[
        EnforceTranslation(),
        AvoidPattern("BsmBI_site"),
        EnforceGCContent(mini=0.4, maxi=0.6, window=60),
    ],
    objectives=[CodonOptimize(species="s_cerevisiae")],
)

print("\nBefore optimization:\n")
print(problem.constraints_text_summary())
print(problem.objectives_text_summary())

problem.resolve_constraints(final_check=True)
problem.optimize()
コード例 #18
0
    def domesticate(
        self,
        dna_sequence=None,
        protein_sequence=None,
        is_cds="default",
        codon_optimization=None,
        extra_constraints=(),
        extra_objectives=(),
        final_record_target=None,
        edit=False,
        barcode="",
        barcode_spacer="AA",
        report_target=None,
    ):
        """Domesticate a sequence.

        Parameters
        ----------

        dna_sequence
          The DNA sequence string to domesticate.

        protein_sequence
          Amino-acid sequence of the protein, which will be converted into
          a DNA sequence string.

        is_cds
          If True, sequence edits are restricted to synonymous mutations.

        codon_optimization
          Either None for no codon optimization or the name of an organism
          supported by DnaChisel.

        extra_constraints
          List of extra constraints to apply to the domesticated sequences.
          Each constraint is either a DnaChisel constraint or a function
          (dna_sequence => DnaChisel constraint).

        extra_objectives
          List of extra optimization objectives to apply to the domesticated
          sequences. Each objective is either a DnaChisel constraint or a
          function (dna_sequence => DnaChisel constraint).

        final_record_target
          Path to the file where to write the final genbank.

        edit
          Turn to True to allow sequence edits (if it is false and no all
          constraints are originally satisfied, a failed domestication result
          (i.e. with attribute ``success`` set to False) will be returned.

        report_target
          Target for the sequence optimization report (a folder path, or a zip
          path).

        barcode
          A sequence of DNA that will be added to the left of the sequence once
          the domestication is done.

        barcode_spacer
          Nucleotides to be added between the barcode and the enzyme (optional,
          the idea here is that they will make sure to avoid the creation of
          unwanted cutting sites).

        Returns
        -------

        final_record, edits_record, report_data, success, msg
        """
        if is_cds == "default":
            is_cds = self.cds_by_default
        if isinstance(dna_sequence, SeqRecord):
            problem = DnaOptimizationProblem.from_record(dna_sequence)
            for spec in problem.constraints + problem.objectives:
                spec.location += len(self.left_flank)
            extra_constraints = list(extra_constraints) + problem.constraints
            extra_objectives = list(extra_constraints) + problem.objectives

        if protein_sequence is not None:
            is_cds = True
            dna_sequence = reverse_translate(protein_sequence)
        constraints = [
            c(dna_sequence) if hasattr(c, "__call__") else c
            for c in list(extra_constraints) + self.constraints
        ]
        location = Location(len(self.left_flank),
                            len(self.left_flank) + len(dna_sequence))
        if is_cds:
            constraints.append(EnforceTranslation(location=location))
        objectives = [
            o(dna_sequence) if hasattr(o, "__call__") else o
            for o in list(extra_objectives) + self.objectives
        ]
        if codon_optimization:
            objectives.append(
                CodonOptimize(species=codon_optimization, location=location))
        if self.minimize_edits:
            objectives.append(AvoidChanges())

        extended_sequence = self.left_flank + dna_sequence + self.right_flank

        if (not is_cds) and (not edit):
            constraints.append(AvoidChanges())
        problem = DnaOptimizationProblem(
            extended_sequence,
            constraints=constraints,
            objectives=objectives,
            logger=self.logger,
        )
        all_constraints_pass = problem.all_constraints_pass()
        no_objectives = (len(problem.objectives) - self.minimize_edits) == 0
        report_data = None
        optimization_successful = True
        message = ""
        # print (all_constraints_pass, no_objectives)
        if not (all_constraints_pass and no_objectives):
            problem.n_mutations = self.simultaneous_mutations

            if report_target is not None:
                (success, message, report_data) = problem.optimize_with_report(
                    target=report_target, project_name=self.name)
                optimization_successful = success
            else:
                report_data = None
                try:
                    problem.resolve_constraints()
                    problem.optimize()
                except Exception as err:
                    message = str(err)
                    optimization_successful = False
                    report_data = None
        final_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
        )
        edits_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
            with_sequence_edits=True,
        )
        if final_record_target is not None:
            SeqIO.write(final_record, final_record_target, "genbank")

        return DomesticationResult(
            problem.sequence_before,
            final_record,
            edits_record,
            report_data,
            optimization_successful,
            message,
        )
コード例 #19
0
        all_9mers = [sequence[i:i + 9] for i in range(len(sequence) - 9)]
        number_of_non_unique_9mers = sum([
            count for ninemer, count in Counter(all_9mers).items() if count > 1
        ])
        score = -(9.0 * number_of_non_unique_9mers) / len(sequence)
        return SpecEvaluation(self,
                              problem,
                              score=score,
                              locations=[Location(0, len(sequence))],
                              message="Score: %.02f (%d non-unique ninemers)" %
                              (score, number_of_non_unique_9mers))

    def __str__(self):
        """String representation."""
        return "MinimizeNinemersScore"


sequence = reverse_translate(random_protein_sequence(300))
problem = DnaOptimizationProblem(sequence=sequence,
                                 constraints=[EnforceTranslation()],
                                 objectives=[MinimizeNinemersScore()])

print("\n=== Status before optimization ===")
print(problem.objectives_text_summary())

problem.optimize()

print("\n=== Status after optimization ===")
print(problem.objectives_text_summary())
print(problem.constraints_text_summary(failed_only=True))
コード例 #20
0
ファイル: domesticator.py プロジェクト: ajasja/domesticator
def load_inserts(inputs):
	rec_counter = 1
	inserts = []

	for this_input in inputs: 
		if os.path.isfile(this_input):
			ext = os.path.splitext(this_input)[1]
			if ext == '.fasta':
				for record in SeqIO.parse(this_input, 'fasta'):
					record.seq = Seq(reverse_translate(record.seq), IUPAC.unambiguous_dna)
					inserts.append(record)
			elif ext == '.pdb':
				for chain_num, record in enumerate(SeqIO.parse(this_input, "pdb-atom")):
					name = os.path.splitext(os.path.basename(this_input))[0] + "_" + record.annotations['chain']
					record.seq = Seq(reverse_translate(record.seq), IUPAC.unambiguous_dna)
					record.id=name
					record.name=name
					inserts.append(record)
			else:
				exit("extension not recognized: " + ext)
		else:
			record = SeqRecord(Seq(reverse_translate(this_input),IUPAC.unambiguous_dna), id="unknown_seq%d" % rec_counter, name="unknown_seq%d" % rec_counter, description="domesticator-optimized DNA sequence")
			rec_counter += 1
			inserts.append(record)


	# if mode == "protein_fasta_file":
	# 	for input_filename in inputs:
	# 		for record in SeqIO.parse(input_filename, 'fasta'):
	# 			record.seq = Seq(reverse_translate(record.seq), IUPAC.unambiguous_dna)
	# 			inserts.append(record)
	# # elif mode == "DNA_fasta_file":
	# 	for input_filename in inputs:
	# 		for record in SeqIO.parse(input_filename, 'fasta'):
	# 			assert(len(record.seq) % 3 == 0)
	# 			record.seq = Seq(str(record.seq), IUPAC.unambiguous_dna)
	# 			inserts.append(record)
	# elif mode == "protein_sequence":
	# 	for input_sequence in inputs:
	# 		record = SeqRecord(Seq(reverse_translate(input_sequence),IUPAC.unambiguous_dna), id="unknown_seq%d" % rec_counter, name="unknown_seq%d" % rec_counter, description="domesticator-optimized DNA sequence")
	# 		rec_counter += 1
	# 		inserts.append(record)

	# elif mode == "DNA_sequence":
	# 	for input_sequence in inputs:
	# 		record = SeqRecord(Seq(input_sequence,IUPAC.unambiguous_dna), id="unknown_seq%d" % rec_counter, name="unknown_seq%d" % rec_counter, description="domesticator-optimized DNA sequence")
	# 		rec_counter += 1
	# 		inserts.append(record)

	# elif mode == "PDB":
	# 	chain="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
	# 	#parser = PDBParser()
	# 	#ppb=PPBuilder()
	# 	for input_pdb in inputs:
	# 		#for chain_num, polypeptide in enumerate(ppb.build_peptides(parser.get_structure('name', input_pdb))):
	# 		for chain_num, record in enumerate(SeqIO.parse(input_pdb, "pdb-atom")):
	# 			#seq = Seq(reverse_translate(polypeptide.get_sequence()), IUPAC.unambiguous_dna)
	# 			name = os.path.splitext(os.path.basename(input_pdb))[0] + "_" + chain[chain_num]
	# 			#record = SeqRecord(seq, id=name, name=name, description="domesticator-optimized DNA sequence")

	# 			record.seq = Seq(reverse_translate(record.seq), IUPAC.unambiguous_dna)
	# 			record.id=name
	# 			record.name=name
	# 			inserts.append(record)
	# else:
	# 	exit("input mode not recognized: " + args.input_mode)

	return inserts
コード例 #21
0
	codon_table = codon_table_11
else:
	print("\ngenetic codes other than 11 (Bacterial, Archaeal) not supported")


#Import target gene
#To do: refactor to process multiple input sequences
gene_object = SeqIO.parse(fasta_path, "fasta")
for dna_seq in gene_object:
			dna_id = dna_seq.id
			print("\nImporting target gene " + dna_id)
			dna = str(dna_seq.seq)
			gene = dna

if protein_flag:
	gene = reverse_translate(gene)

if input_path and taxid:
	print("\ngene list and taxonomic ID both provided. Defaulting to gene list")

#Import gene list for RSCU calculation
if input_path:
	print("\nImporting genes for RSCU calculation")
	seq_list = []
	counter = 0
	n_count = 0
	seq_object = SeqIO.parse(input_path, "fasta")
	for seqs in seq_object:
				seq_id = seqs.id
				seq = str(seqs.seq)
				seq_list.append(seq)