def restrict_nucleotides(self, sequence, location=None): if self.codons_sequences is None: return [] strand = self.location.strand start = self.location.start end = self.location.end if strand == 1: return [ ((i, i + 3), set(self.codons_sequences[ self.translation[int((i - start) / 3)] ])) for i in range(start, end, 3) ] else: return [ ((i, i + 3), set( reverse_complement(n) for n in self.codons_sequences[ self.translation[-int((i - start) / 3) - 1] ] )) for i in range(start, end, 3) ]
def restrict_nucleotides(self, sequence, location=None): """When localizing, forbid any nucleotide but the one already there.""" if self.location.strand != -1: choices = set(self.choices) else: choices = set([reverse_complement(c) for c in self.choices]) return [((self.location.start, self.location.end), choices)]
def restrict_nucleotides(self, sequence, location=None): """As a constraint, put the choices in the mutation space.""" if self.location.strand != -1: choices = set(self.choices) else: choices = set([reverse_complement(c) for c in self.choices]) return [((self.location.start, self.location.end), choices)]
def get_kmer_extractor(sequence, include_reverse_complement=True, min_length=1): """""" if include_reverse_complement: rev_comp_sequence = reverse_complement(sequence) L = len(sequence) def extract_kmer(i): subsequence = sequence[i: i + min_length] rev_comp = rev_comp_sequence[L - i - min_length: L - i] return min(subsequence, rev_comp) else: def extract_kmer(i): return sequence[i: i + min_length] return extract_kmer
def insert_pattern_in_problem(self, problem, reverse=False): """Insert the pattern in the problem's sequence by successive tries. This heuristic is attempted to get the number of occurences in the pattern from 0 to some number """ sequence_to_insert = self.pattern.sequence if reverse: sequence_to_insert = reverse_complement(sequence_to_insert) L = self.pattern.size starts = range(self.location.start, self.location.end - L) if self.center: center = 0.5 * (self.location.start + self.location.end) starts = sorted(starts, key=lambda s: abs(s - center)) for start in starts: new_location = Location(start, start + L, self.location.strand) new_constraint = EnforceSequence( sequence=sequence_to_insert, location=new_location ) new_space = MutationSpace.from_optimization_problem( problem, new_constraints=[new_constraint] ) if len(new_space.unsolvable_segments) > 0: continue new_sequence = new_space.constrain_sequence(problem.sequence) new_constraints = problem.constraints + [new_constraint] new_problem = DnaOptimizationProblem( sequence=new_sequence, constraints=new_constraints, mutation_space=new_space, logger=None, ) if self.evaluate(new_problem).passes: try: new_problem.resolve_constraints() problem.sequence = new_problem.sequence return except NoSolutionError: pass if (not reverse) and (not self.pattern.is_palyndromic): self.insert_pattern_in_problem(problem, reverse=True) return raise NoSolutionError( problem=problem, location=self.location, message="Insertion of pattern %s in %s failed" % (self.pattern.sequence, self.location), )
def get_kmer_extractor(self, sequence): if self.include_reverse_complement: # reverse-complement is done here ad-hoc as it can be bottlenecky rev_comp_sequence = reverse_complement(sequence) L = len(sequence) def extract_kmer(i): subsequence = sequence[i:i + self.min_length] rev_comp = rev_comp_sequence[L - i - self.min_length:L - i] return min(subsequence, rev_comp) else: def extract_kmer(i): return sequence[i:i + self.min_length] return extract_kmer
def evaluate(self, problem): """Return the score (-number_of_hairpins) and hairpins locations.""" sequence = self.location.extract_sequence(problem.sequence) reverse = reverse_complement(sequence) locations = [] for i in range(len(sequence) - self.hairpin_window): word = sequence[i:i + self.stem_size] rest = reverse[-(i + self.hairpin_window):-(i + self.stem_size)] if word in rest: locations.append((i, i + rest.index(word) + len(word))) score = -len(locations) locations = group_nearby_segments(locations, max_start_spread=10) locations = sorted([ Location(l[0][0], l[-1][1] + self.hairpin_window) for l in locations ]) return SpecEvaluation(self, problem, score, locations=locations)
def restrict_nucleotides(self, sequence, location=None): """When localizing, forbid any nucleotide but the one already there.""" if location is not None: new_location = self.location.overlap_region(location) if new_location is None: return [] else: new_location = self.location start, end = new_location.start, new_location.end if self.location.strand == -1: lend = self.location.end return [(i, set( reverse_complement(n) for n in IUPAC_NOTATION[self.sequence[lend - i]])) for i in range(start, end)] else: lstart = self.location.start return [(i, IUPAC_NOTATION[self.sequence[i - lstart]]) for i in range(start, end)]
def get_kmer_extractor_cached(sequence, include_reverse_complement=True, min_length=1): """Kmer extractor with memoization. This globally cached method enables much faster computations when several AvoidNonUniqueSegments functions with equal min_length are used. """ if include_reverse_complement: rev_comp_sequence = reverse_complement(sequence) L = len(sequence) @lru_cache(maxsize=len(sequence)) def extract_kmer(i): subsequence = sequence[i: i + min_length] rev_comp = rev_comp_sequence[L - i - min_length: L - i] return min(subsequence, rev_comp) else: @lru_cache(maxsize=len(sequence)) def extract_kmer(i): return sequence[i: i + min_length] return extract_kmer
def _enzymes_names_to_distances_graph(enzymes_names): enzymes_names = enzymes_names[:] np.random.shuffle(enzymes_names) enzymes_sites = get_enzymes_ATGC_sequences(enzymes_names) patterns = enzymes_to_dna_pattern(enzymes_names) core_enzymes = {} for e1 in list(enzymes_names): site1 = enzymes_sites[e1] rev_site1 = reverse_complement(site1) for e2 in list(enzymes_names): if e1 == e2: continue site2 = enzymes_sites[e2] pattern2 = patterns[e2] if any([ site1 == site2, site2 in site1, rev_site1 == site2, site2 in rev_site1, len(pattern2.find_matches(site1)), len(pattern2.find_matches(rev_site1)), ]): if e1 not in core_enzymes: core_enzymes[e1] = [] if e2 not in core_enzymes[e1]: core_enzymes[e1].append(e2) graph = {} for e1 in enzymes_names: site1 = enzymes_sites[e1] for e2 in enzymes_names: if e1 == e2: continue site2 = enzymes_sites[e2] diff = sites_difference(site1, site2) graph[(e1, e2)] = dict(diff=diff, dist=len(diff)) return graph, enzymes_sites, core_enzymes
def extract_kmer(i): subsequence = problem.sequence[i:i + self.min_length] if self.include_reverse_complement: return min(subsequence, reverse_complement(subsequence)) else: return subsequence