from dnachisel import DnaOptimizationProblem, AvoidPattern, random_dna_sequence
from urllib import request

# DOWNLOAD THE LIST OF TF BINDING SITES
url = "http://regulondb.ccg.unam.mx/menu/download/datasets/files/PSSMSet.txt"
data = request.urlopen(url).read().decode('utf-8')

# PARSE THE DATA LINE BY LINE TO OBTAIN A LIST OF TF BINDING SEQUENCES
tf_binding_sequences = [
    line for line in data.splitlines() if set() < set(line) <= set("ATGC")
]

# DEFINE AND SOLVE THE OPTIMIZATION PROBLEM
problem = DnaOptimizationProblem(
    sequence=random_dna_sequence(50000),
    constraints=[AvoidPattern(pattern) for pattern in tf_binding_sequences])
problem.resolve_constraints()
problem.to_record("sequence_without_tf_binding_sites.gb")
Example #2
0
			naive_construct.features.append(whole_seq_feat)

		dest_feat = find_annotation(naive_construct, placeholder.name)
		dest_loc = Location.from_biopython_location(dest_feat.location)


		user_objectives, user_constraints = load_user_options(args, dest_loc)
	

		objectives += user_objectives
		constraints += user_constraints


		problem = DnaOptimizationProblem(str(naive_construct.seq), constraints=constraints, objectives=objectives)

		domesticator_record = problem.to_record()

		mature_construct = naive_construct
		mature_construct.features.extend(domesticator_record.features)

		SeqIO.write([mature_construct], output_filename, "genbank")
		exit("exported " + output_filename)




	inserts = load_inserts(args.input)

	#now load all the custom global constraints and objectives?

	outputs = []
Example #3
0
url = (
    "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"
    + "db=nucleotide&id=48994873&rettype=gb&retmode=txt"
)
genbank_data = request.urlopen(url).read().decode("utf-8")
genbank_record = load_record(StringIO(genbank_data), file_format="genbank")

print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...")

constraints = []
for feature in genbank_record.features:
    if feature.type == "gene" and len(feature.location.parts) == 1:
        location = Location.from_biopython_location(feature.location)
        if (len(location) % 3 == 0) and len(location) > 100:
            gene_constraints = [
                EnforceTranslation(location = location),
                AvoidPattern("BsmBI_site", location),
                EnforceGCContent(
                    mini=0.40, maxi=0.60, window=150, location=location
                ),
            ]
            constraints.extend(gene_constraints)
problem = DnaOptimizationProblem(genbank_record, constraints)

print("RESOLVING THE CONSTRAINTS...")

problem.logger.ignore_bars_under = 50
problem.resolve_constraints()
problem.to_record("ecoli_genes_optimization.gb")
Example #4
0
    def domesticate(
        self,
        dna_sequence=None,
        protein_sequence=None,
        is_cds="default",
        codon_optimization=None,
        extra_constraints=(),
        extra_objectives=(),
        final_record_target=None,
        edit=False,
        barcode="",
        barcode_spacer="AA",
        report_target=None,
    ):
        """Domesticate a sequence.

        Parameters
        ----------

        dna_sequence
          The DNA sequence string to domesticate.

        protein_sequence
          Amino-acid sequence of the protein, which will be converted into
          a DNA sequence string.

        is_cds
          If True, sequence edits are restricted to synonymous mutations.

        codon_optimization
          Either None for no codon optimization or the name of an organism
          supported by DnaChisel.

        extra_constraints
          List of extra constraints to apply to the domesticated sequences.
          Each constraint is either a DnaChisel constraint or a function
          (dna_sequence => DnaChisel constraint).

        extra_objectives
          List of extra optimization objectives to apply to the domesticated
          sequences. Each objective is either a DnaChisel constraint or a
          function (dna_sequence => DnaChisel constraint).

        final_record_target
          Path to the file where to write the final genbank.

        edit
          Turn to True to allow sequence edits (if it is false and no all
          constraints are originally satisfied, a failed domestication result
          (i.e. with attribute ``success`` set to False) will be returned.

        report_target
          Target for the sequence optimization report (a folder path, or a zip
          path).

        barcode
          A sequence of DNA that will be added to the left of the sequence once
          the domestication is done.

        barcode_spacer
          Nucleotides to be added between the barcode and the enzyme (optional,
          the idea here is that they will make sure to avoid the creation of
          unwanted cutting sites).

        Returns
        -------

        final_record, edits_record, report_data, success, msg
        """
        if is_cds == "default":
            is_cds = self.cds_by_default
        if isinstance(dna_sequence, SeqRecord):
            problem = DnaOptimizationProblem.from_record(dna_sequence)
            for spec in problem.constraints + problem.objectives:
                spec.location += len(self.left_flank)
            extra_constraints = list(extra_constraints) + problem.constraints
            extra_objectives = list(extra_constraints) + problem.objectives

        if protein_sequence is not None:
            is_cds = True
            dna_sequence = reverse_translate(protein_sequence)
        constraints = [
            c(dna_sequence) if hasattr(c, "__call__") else c
            for c in list(extra_constraints) + self.constraints
        ]
        location = Location(len(self.left_flank),
                            len(self.left_flank) + len(dna_sequence))
        if is_cds:
            constraints.append(EnforceTranslation(location=location))
        objectives = [
            o(dna_sequence) if hasattr(o, "__call__") else o
            for o in list(extra_objectives) + self.objectives
        ]
        if codon_optimization:
            objectives.append(
                CodonOptimize(species=codon_optimization, location=location))
        if self.minimize_edits:
            objectives.append(AvoidChanges())

        extended_sequence = self.left_flank + dna_sequence + self.right_flank

        if (not is_cds) and (not edit):
            constraints.append(AvoidChanges())
        problem = DnaOptimizationProblem(
            extended_sequence,
            constraints=constraints,
            objectives=objectives,
            logger=self.logger,
        )
        all_constraints_pass = problem.all_constraints_pass()
        no_objectives = (len(problem.objectives) - self.minimize_edits) == 0
        report_data = None
        optimization_successful = True
        message = ""
        # print (all_constraints_pass, no_objectives)
        if not (all_constraints_pass and no_objectives):
            problem.n_mutations = self.simultaneous_mutations

            if report_target is not None:
                (success, message, report_data) = problem.optimize_with_report(
                    target=report_target, project_name=self.name)
                optimization_successful = success
            else:
                report_data = None
                try:
                    problem.resolve_constraints()
                    problem.optimize()
                except Exception as err:
                    message = str(err)
                    optimization_successful = False
                    report_data = None
        final_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
        )
        edits_record = problem.to_record(
            with_original_features=True,
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
            with_sequence_edits=True,
        )
        if final_record_target is not None:
            SeqIO.write(final_record, final_record_target, "genbank")

        return DomesticationResult(
            problem.sequence_before,
            final_record,
            edits_record,
            report_data,
            optimization_successful,
            message,
        )