예제 #1
0
    def restrict_nucleotides(self, sequence, location=None):
        if self.codons_sequences is None:
            return []

        strand = self.location.strand
        start = self.location.start
        end = self.location.end

        if strand == 1:

            return [
                ((i, i + 3), set(self.codons_sequences[
                    self.translation[int((i - start) / 3)]
                ]))
                for i in range(start, end, 3)
            ]
        else:
            return [
                ((i, i + 3), set(
                    reverse_complement(n)
                    for n in self.codons_sequences[
                        self.translation[-int((i - start) / 3) - 1]
                    ]
                ))
                for i in range(start, end, 3)
            ]
예제 #2
0
 def restrict_nucleotides(self, sequence, location=None):
     """When localizing, forbid any nucleotide but the one already there."""
     if self.location.strand != -1:
         choices = set(self.choices)
     else:
         choices = set([reverse_complement(c) for c in self.choices])
     return [((self.location.start, self.location.end), choices)]
예제 #3
0
    def restrict_nucleotides(self, sequence, location=None):
        """As a constraint, put the choices in the mutation space."""

        if self.location.strand != -1:
            choices = set(self.choices)
        else:
            choices = set([reverse_complement(c) for c in self.choices])
        return [((self.location.start, self.location.end), choices)]
def get_kmer_extractor(sequence, include_reverse_complement=True,
                              min_length=1):
    """"""
    if include_reverse_complement:
        rev_comp_sequence = reverse_complement(sequence)
        L = len(sequence)
        def extract_kmer(i):
            subsequence = sequence[i: i + min_length]
            rev_comp = rev_comp_sequence[L - i - min_length: L - i]
            return min(subsequence, rev_comp)
    else:
        def extract_kmer(i):
            return sequence[i: i + min_length]
    return extract_kmer
예제 #5
0
    def insert_pattern_in_problem(self, problem, reverse=False):
        """Insert the pattern in the problem's sequence by successive tries.

        This heuristic is attempted to get the number of occurences in the
        pattern from 0 to some number
        """
        sequence_to_insert = self.pattern.sequence
        if reverse:
            sequence_to_insert = reverse_complement(sequence_to_insert)
        L = self.pattern.size
        starts = range(self.location.start, self.location.end - L)
        if self.center:
            center = 0.5 * (self.location.start + self.location.end)
            starts = sorted(starts, key=lambda s: abs(s - center))
        for start in starts:
            new_location = Location(start, start + L, self.location.strand)
            new_constraint = EnforceSequence(
                sequence=sequence_to_insert, location=new_location
            )
            new_space = MutationSpace.from_optimization_problem(
                problem, new_constraints=[new_constraint]
            )
            if len(new_space.unsolvable_segments) > 0:
                continue
            new_sequence = new_space.constrain_sequence(problem.sequence)
            new_constraints = problem.constraints + [new_constraint]
            new_problem = DnaOptimizationProblem(
                sequence=new_sequence,
                constraints=new_constraints,
                mutation_space=new_space,
                logger=None,
            )
            if self.evaluate(new_problem).passes:
                try:
                    new_problem.resolve_constraints()
                    problem.sequence = new_problem.sequence
                    return
                except NoSolutionError:
                    pass
        if (not reverse) and (not self.pattern.is_palyndromic):
            self.insert_pattern_in_problem(problem, reverse=True)
            return
        raise NoSolutionError(
            problem=problem,
            location=self.location,
            message="Insertion of pattern %s in %s failed"
            % (self.pattern.sequence, self.location),
        )
예제 #6
0
    def get_kmer_extractor(self, sequence):
        if self.include_reverse_complement:
            # reverse-complement is done here ad-hoc as it can be bottlenecky
            rev_comp_sequence = reverse_complement(sequence)
            L = len(sequence)

            def extract_kmer(i):
                subsequence = sequence[i:i + self.min_length]
                rev_comp = rev_comp_sequence[L - i - self.min_length:L - i]
                return min(subsequence, rev_comp)
        else:

            def extract_kmer(i):
                return sequence[i:i + self.min_length]

        return extract_kmer
예제 #7
0
    def evaluate(self, problem):
        """Return the score (-number_of_hairpins) and hairpins locations."""
        sequence = self.location.extract_sequence(problem.sequence)
        reverse = reverse_complement(sequence)
        locations = []
        for i in range(len(sequence) - self.hairpin_window):
            word = sequence[i:i + self.stem_size]
            rest = reverse[-(i + self.hairpin_window):-(i + self.stem_size)]
            if word in rest:
                locations.append((i, i + rest.index(word) + len(word)))
        score = -len(locations)
        locations = group_nearby_segments(locations, max_start_spread=10)
        locations = sorted([
            Location(l[0][0], l[-1][1] + self.hairpin_window)
            for l in locations
        ])

        return SpecEvaluation(self, problem, score, locations=locations)
예제 #8
0
 def restrict_nucleotides(self, sequence, location=None):
     """When localizing, forbid any nucleotide but the one already there."""
     if location is not None:
         new_location = self.location.overlap_region(location)
         if new_location is None:
             return []
     else:
         new_location = self.location
     start, end = new_location.start, new_location.end
     if self.location.strand == -1:
         lend = self.location.end
         return [(i,
                  set(
                      reverse_complement(n)
                      for n in IUPAC_NOTATION[self.sequence[lend - i]]))
                 for i in range(start, end)]
     else:
         lstart = self.location.start
         return [(i, IUPAC_NOTATION[self.sequence[i - lstart]])
                 for i in range(start, end)]
def get_kmer_extractor_cached(sequence, include_reverse_complement=True,
                              min_length=1):
    """Kmer extractor with memoization.
    
    This globally cached method enables much faster computations when
    several AvoidNonUniqueSegments functions with equal min_length are used. 
    """
    if include_reverse_complement:
        rev_comp_sequence = reverse_complement(sequence)
        L = len(sequence)
        @lru_cache(maxsize=len(sequence))
        def extract_kmer(i):
            subsequence = sequence[i: i + min_length]
            rev_comp = rev_comp_sequence[L - i - min_length: L - i]
            return min(subsequence, rev_comp)
    else:
        @lru_cache(maxsize=len(sequence))
        def extract_kmer(i):
            return sequence[i: i + min_length]
    return extract_kmer
예제 #10
0
def _enzymes_names_to_distances_graph(enzymes_names):
    enzymes_names = enzymes_names[:]
    np.random.shuffle(enzymes_names)
    enzymes_sites = get_enzymes_ATGC_sequences(enzymes_names)
    patterns = enzymes_to_dna_pattern(enzymes_names)
    core_enzymes = {}
    for e1 in list(enzymes_names):
        site1 = enzymes_sites[e1]
        rev_site1 = reverse_complement(site1)
        for e2 in list(enzymes_names):
            if e1 == e2:
                continue
            site2 = enzymes_sites[e2]
            pattern2 = patterns[e2]
            if any([
                    site1 == site2,
                    site2 in site1,
                    rev_site1 == site2,
                    site2 in rev_site1,
                    len(pattern2.find_matches(site1)),
                    len(pattern2.find_matches(rev_site1)),
            ]):
                if e1 not in core_enzymes:
                    core_enzymes[e1] = []
                if e2 not in core_enzymes[e1]:
                    core_enzymes[e1].append(e2)
    graph = {}
    for e1 in enzymes_names:
        site1 = enzymes_sites[e1]
        for e2 in enzymes_names:
            if e1 == e2:
                continue
            site2 = enzymes_sites[e2]
            diff = sites_difference(site1, site2)
            graph[(e1, e2)] = dict(diff=diff, dist=len(diff))
    return graph, enzymes_sites, core_enzymes
예제 #11
0
 def extract_kmer(i):
     subsequence = problem.sequence[i:i + self.min_length]
     if self.include_reverse_complement:
         return min(subsequence, reverse_complement(subsequence))
     else:
         return subsequence