from dnachisel import DnaOptimizationProblem, AvoidPattern, random_dna_sequence from urllib import request # DOWNLOAD THE LIST OF TF BINDING SITES url = "http://regulondb.ccg.unam.mx/menu/download/datasets/files/PSSMSet.txt" data = request.urlopen(url).read().decode('utf-8') # PARSE THE DATA LINE BY LINE TO OBTAIN A LIST OF TF BINDING SEQUENCES tf_binding_sequences = [ line for line in data.splitlines() if set() < set(line) <= set("ATGC") ] # DEFINE AND SOLVE THE OPTIMIZATION PROBLEM problem = DnaOptimizationProblem( sequence=random_dna_sequence(50000), constraints=[AvoidPattern(pattern) for pattern in tf_binding_sequences]) problem.resolve_constraints() problem.to_record("sequence_without_tf_binding_sites.gb")
naive_construct.features.append(whole_seq_feat) dest_feat = find_annotation(naive_construct, placeholder.name) dest_loc = Location.from_biopython_location(dest_feat.location) user_objectives, user_constraints = load_user_options(args, dest_loc) objectives += user_objectives constraints += user_constraints problem = DnaOptimizationProblem(str(naive_construct.seq), constraints=constraints, objectives=objectives) domesticator_record = problem.to_record() mature_construct = naive_construct mature_construct.features.extend(domesticator_record.features) SeqIO.write([mature_construct], output_filename, "genbank") exit("exported " + output_filename) inserts = load_inserts(args.input) #now load all the custom global constraints and objectives? outputs = []
url = ( "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?" + "db=nucleotide&id=48994873&rettype=gb&retmode=txt" ) genbank_data = request.urlopen(url).read().decode("utf-8") genbank_record = load_record(StringIO(genbank_data), file_format="genbank") print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...") constraints = [] for feature in genbank_record.features: if feature.type == "gene" and len(feature.location.parts) == 1: location = Location.from_biopython_location(feature.location) if (len(location) % 3 == 0) and len(location) > 100: gene_constraints = [ EnforceTranslation(location = location), AvoidPattern("BsmBI_site", location), EnforceGCContent( mini=0.40, maxi=0.60, window=150, location=location ), ] constraints.extend(gene_constraints) problem = DnaOptimizationProblem(genbank_record, constraints) print("RESOLVING THE CONSTRAINTS...") problem.logger.ignore_bars_under = 50 problem.resolve_constraints() problem.to_record("ecoli_genes_optimization.gb")
def domesticate( self, dna_sequence=None, protein_sequence=None, is_cds="default", codon_optimization=None, extra_constraints=(), extra_objectives=(), final_record_target=None, edit=False, barcode="", barcode_spacer="AA", report_target=None, ): """Domesticate a sequence. Parameters ---------- dna_sequence The DNA sequence string to domesticate. protein_sequence Amino-acid sequence of the protein, which will be converted into a DNA sequence string. is_cds If True, sequence edits are restricted to synonymous mutations. codon_optimization Either None for no codon optimization or the name of an organism supported by DnaChisel. extra_constraints List of extra constraints to apply to the domesticated sequences. Each constraint is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). extra_objectives List of extra optimization objectives to apply to the domesticated sequences. Each objective is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). final_record_target Path to the file where to write the final genbank. edit Turn to True to allow sequence edits (if it is false and no all constraints are originally satisfied, a failed domestication result (i.e. with attribute ``success`` set to False) will be returned. report_target Target for the sequence optimization report (a folder path, or a zip path). barcode A sequence of DNA that will be added to the left of the sequence once the domestication is done. barcode_spacer Nucleotides to be added between the barcode and the enzyme (optional, the idea here is that they will make sure to avoid the creation of unwanted cutting sites). Returns ------- final_record, edits_record, report_data, success, msg """ if is_cds == "default": is_cds = self.cds_by_default if isinstance(dna_sequence, SeqRecord): problem = DnaOptimizationProblem.from_record(dna_sequence) for spec in problem.constraints + problem.objectives: spec.location += len(self.left_flank) extra_constraints = list(extra_constraints) + problem.constraints extra_objectives = list(extra_constraints) + problem.objectives if protein_sequence is not None: is_cds = True dna_sequence = reverse_translate(protein_sequence) constraints = [ c(dna_sequence) if hasattr(c, "__call__") else c for c in list(extra_constraints) + self.constraints ] location = Location(len(self.left_flank), len(self.left_flank) + len(dna_sequence)) if is_cds: constraints.append(EnforceTranslation(location=location)) objectives = [ o(dna_sequence) if hasattr(o, "__call__") else o for o in list(extra_objectives) + self.objectives ] if codon_optimization: objectives.append( CodonOptimize(species=codon_optimization, location=location)) if self.minimize_edits: objectives.append(AvoidChanges()) extended_sequence = self.left_flank + dna_sequence + self.right_flank if (not is_cds) and (not edit): constraints.append(AvoidChanges()) problem = DnaOptimizationProblem( extended_sequence, constraints=constraints, objectives=objectives, logger=self.logger, ) all_constraints_pass = problem.all_constraints_pass() no_objectives = (len(problem.objectives) - self.minimize_edits) == 0 report_data = None optimization_successful = True message = "" # print (all_constraints_pass, no_objectives) if not (all_constraints_pass and no_objectives): problem.n_mutations = self.simultaneous_mutations if report_target is not None: (success, message, report_data) = problem.optimize_with_report( target=report_target, project_name=self.name) optimization_successful = success else: report_data = None try: problem.resolve_constraints() problem.optimize() except Exception as err: message = str(err) optimization_successful = False report_data = None final_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, ) edits_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, with_sequence_edits=True, ) if final_record_target is not None: SeqIO.write(final_record, final_record_target, "genbank") return DomesticationResult( problem.sequence_before, final_record, edits_record, report_data, optimization_successful, message, )