Ejemplo n.º 1
0
 def initialized_on_problem(self, problem, role):
     """Find out what sequence it is that we are supposed to conserve."""
     if not hasattr(self, 'ends_locations') or self.ends_locations is None:
         L = len(problem.sequence)
         wsize = self.window_size
         ends_locations = [Location(0, wsize), Location(L - wsize, L)]
         return self.copy_with_changes(ends_locations=ends_locations)
     else:
         return self
Ejemplo n.º 2
0
    def evaluate(self, problem):
        """Score as (-total number of blast identities in matches)."""
        location = self.location
        if location is None:
            location = Location(0, len(problem.sequence))
        sequence = location.extract_sequence(problem.sequence)

        blast_record = blast_sequence(sequence,
                                      blast_db=self.blast_db,
                                      subject_sequences=self.sequences,
                                      word_size=self.word_size,
                                      perc_identity=self.perc_identity,
                                      num_alignments=self.num_alignments,
                                      num_threads=self.num_threads,
                                      ungapped=self.ungapped,
                                      e_value=self.e_value,
                                      culling_limit=self.culling_limit)

        if isinstance(blast_record, list):
            alignments = [
                alignment for rec in blast_record
                for alignment in rec.alignments
            ]
        else:
            alignments = blast_record.alignments

        query_hits = [
            (min(hit.query_start, hit.query_end) + location.start - 1,
             max(hit.query_start, hit.query_end) + location.start,
             1 - 2 * (hit.query_start > hit.query_end), hit.identities)
            for alignment in alignments for hit in alignment.hsps
        ]

        locations = sorted([(start, end, ids)
                            for (start, end, strand, ids) in query_hits
                            if (end - start) >= self.min_align_length])
        # locations = [
        #     (r[0][0], r[-1][-1])
        #     for r in group_nearby_segments(locations, max_start_spread=2)
        # ]

        score = -sum([ids for start, end, ids in locations])
        locations = [Location(start, end) for start, end, ids in locations]

        if locations == []:
            return SpecEvaluation(self,
                                  problem,
                                  score=1,
                                  message="Passed: no BLAST match found")

        return SpecEvaluation(self,
                              problem,
                              score=score,
                              locations=locations,
                              message="Failed - matches at %s" % locations)
Ejemplo n.º 3
0
    def __init__(self, location=None, translation=None, boost=1.0):
        """Initialize."""
        self.translation = translation
        if isinstance(location, tuple):
            location = Location.from_tuple(location, default_strand=+1)
        if (location is not None) and (location.strand not in [-1, 1]):
            location = Location(location.start, location.end, 1)
        self.set_location(location)
        self.boost = boost

        self.initialize_translation_from_problem = (translation is None)
        self.initialize_location_from_problem = (location is None)
Ejemplo n.º 4
0
 def codon_index_to_location(self, index):
     if self.location.strand >= 0:
         return Location(
             start=self.location.start + 3 * index,
             end=self.location.start + 3 * (index + 1),
             strand=1
         )
     else:
         return Location(
             start=self.location.end - 3 * (index + 1),
             end=self.location.end - 3 * index,
             strand=-1,
         )
Ejemplo n.º 5
0
 def __init__(self, min_length, location=None, extended_location=None,
              include_reverse_complement=True, boost=1.0,
              localization_data=None):
     """Initialize."""
     self.min_length = min_length
     if isinstance(location, tuple):
         location = Location.from_tuple(location)
     self.location = location
     if isinstance(extended_location, tuple):
         extended_location = Location.from_tuple(extended_location)
     self.extended_location = extended_location
     self.include_reverse_complement = include_reverse_complement
     self.boost = 1.0
     self.localization_data = localization_data
Ejemplo n.º 6
0
    def __init__(self,
                 max_energy=-5.0,
                 location=None,
                 optimize_initiator=False,
                 boost=1.0):
        self.max_e = max_energy
        self.boost = boost
        self.optimize_initiator = optimize_initiator

        if isinstance(location, tuple):
            location = Location.from_tuple(location)
        if location is not None and (location.strand == -1):
            location = Location(location.start, location.end, 1)
        self.location = location
Ejemplo n.º 7
0
    def global_evaluation(self, problem):
        extract_kmer = self.get_kmer_extractor(problem.sequence)
        kmers_locations = defaultdict(lambda: [])
        start, end = self.extended_location.start, self.extended_location.end
        for i in range(start, end - self.min_length):
            kmers_locations[extract_kmer(i)].append((i, i + self.min_length))
        locations = sorted([
            Location(start_, end_)
            for locations_list in kmers_locations.values()
            for start_, end_ in locations_list if len(locations_list) > 1 and (
                self.location.start < start_ < end_ < self.location.end)
        ],
                           key=lambda l: l.start)

        if locations == []:
            return SpecEvaluation(
                self,
                problem,
                score=0,
                message="Passed: no nonunique %d-mer found." % self.min_length)

        return SpecEvaluation(
            self,
            problem,
            score=-len(locations),
            locations=locations,
            message="Failed, the following positions are the first occurences "
            "of non-unique segments %s" % locations)
Ejemplo n.º 8
0
 def __init__(
     self,
     amount=None,
     amount_percent=None,
     minimum=None,
     minimum_percent=None,
     location=None,
     indices=None,
     reference=None,
     boost=1.0,
 ):
     """Initialize."""
     # raise NotImplementedError("This class is not yet implemented")
     # if location is None and (indices is not None):
     #     location = (min(indices), max(indices) + 1)
     self.location = Location.from_data(location)
     if (self.location is not None) and self.location.strand == -1:
         self.location.strand = 1
     self.indices = np.array(indices) if (indices is not None) else None
     self.reference = reference
     # self.passive_objective = passive_objective
     self.amount = amount
     self.amount_percent = amount_percent
     self.minimum = minimum
     self.minimum_percent = minimum_percent
     if isinstance(amount, str) and amount.endswith("%"):
         self.amount = None
         self.amount_percent = float(amount[:-1])
     if isinstance(minimum, str) and minimum.endswith("%"):
         self.minimum = None
         self.minimum_percent = float(minimum[:-1])
     self.boost = boost
Ejemplo n.º 9
0
 def __init__(
     self,
     location=None,
     tmin=50,
     tmax=70,
     max_homology_length=6,
     avoid_heterodim_with=None,
     max_heterodim_tm=5,
     avoided_repeats=((2, 5), (3, 4), (4, 3)),
 ):
     location = Location.from_data(location)
     specs = {
         "unique_sequence": UniquifyAllKmers(
             k=max_homology_length, location=location
         ),
         "melting_temperature": EnforceMeltingTemperature(
             mini=tmin, maxi=tmax, location=location
         ),
         **{
             "repeats_%d_%d"
             % (k, n): AvoidPattern(
                 RepeatedKmerPattern(k, n), location=location
             )
             for (k, n) in avoided_repeats
         },
     }
     if avoid_heterodim_with is not None:
         specs["avoid_heterodimerization"] = AvoidHeterodimerization(
             other_primers_sequences=avoid_heterodim_with,
             tmax=max_heterodim_tm,
             location=location,
         )
     self.register_specifications(specs)
Ejemplo n.º 10
0
 def __init__(self,
              blast_db=None,
              sequences=None,
              word_size=4,
              perc_identity=100,
              num_alignments=100000,
              num_threads=3,
              min_align_length=20,
              ungapped=True,
              e_value=1e80,
              culling_limit=1,
              location=None):
     """Initialize."""
     if isinstance(location, tuple):
         location = Location.from_tuple(location)
     self.blast_db = blast_db
     self.sequences = sequences
     self.word_size = word_size
     self.perc_identity = perc_identity
     self.num_alignments = num_alignments
     self.num_threads = num_threads
     self.min_align_length = min_align_length
     self.location = location
     self.e_value = e_value
     self.ungapped = ungapped
     self.culling_limit = culling_limit
Ejemplo n.º 11
0
    def evaluate(self, problem):
        """Return a score equal to -number_of modifications.

        Locations are "binned" modifications regions. Each bin has a length
        in nucleotides equal to ``localization_interval_length`.`
        """
        target = self.target_sequence
        sequence = self.extract_subsequence(problem.sequence)
        discrepancies = np.nonzero(
            sequences_differences_array(sequence, target))[0]

        if self.indices is not None:
            discrepancies = self.indices[discrepancies]
        elif self.location is not None:
            if self.location.strand == -1:
                discrepancies = self.location.end - discrepancies
            else:
                discrepancies = discrepancies + self.location.start

        intervals = [(r[0], r[-1]) for r in group_nearby_indices(
            discrepancies, max_group_spread=self.localization_interval_length)]
        locations = [Location(start, end, 1) for start, end in intervals]

        return SpecEvaluation(self,
                              problem,
                              score=-len(discrepancies),
                              locations=locations)
Ejemplo n.º 12
0
    def __init__(self, sequence=None, location=None, boost=1.0):
        """Initialize."""

        self.sequence = sequence
        if isinstance(location, tuple):
            location = Location.from_tuple(location, default_strand=+1)
        self.location = location
        self.boost = boost
Ejemplo n.º 13
0
 def __init__(
     self, stem_size=20, hairpin_window=200, location=None, boost=1.0
 ):
     """Initialize."""
     self.stem_size = stem_size
     self.hairpin_window = hairpin_window
     self.location = Location.from_data(location)
     self.boost = boost
Ejemplo n.º 14
0
 def initialize_on_problem(self, problem, role):
     """Find out what sequence it is that we are supposed to conserve."""
     if self.location is None:
         result = self.copy_with_changes()
         result.location = Location(0, len(problem.sequence), 1)
         return result
     else:
         return self
Ejemplo n.º 15
0
 def codons_indices_to_locations(self, indices):
     """Convert a list of codon positions to a list of Locations"""
     indices = np.array(indices)
     if self.location.strand == -1:
         indices = sorted(self.location.end - indices)
         return [
             Location(group[0] - 3, group[-1], strand=-1)
             for group in group_nearby_indices(
                 indices, max_group_spread=self.localization_group_spread)
         ]
     else:
         indices += self.location.start
         return [
             Location(group[0], group[-1] + 3)
             for group in group_nearby_indices(
                 indices, max_group_spread=self.localization_group_spread)
         ]
Ejemplo n.º 16
0
 def __init__(self,
              species=None,
              location=None,
              codon_usage_table=None,
              boost=1.0):
     self.boost = boost
     self.location = Location.from_data(location)
     self.species = species
     self.codon_usage_table = self.get_codons_table(species,
                                                    codon_usage_table)
Ejemplo n.º 17
0
 def __init__(
     self, pattern=None, occurences=1, location=None, center=True, boost=1.0
 ):
     """Initialize."""
     if isinstance(pattern, str):
         pattern = SequencePattern.from_string(pattern)
     self.pattern = pattern
     self.location = Location.from_data(location)
     self.occurences = occurences
     self.center = center
     self.boost = boost
Ejemplo n.º 18
0
 def __init__(self,
              locations,
              compatibility_condition,
              condition_label='',
              boost=1.0):
     self.locations = [
         Location.from_tuple(location) for location in locations
     ]
     self.compatibility_condition = compatibility_condition
     self.condition_label = condition_label
     self.boost = boost
Ejemplo n.º 19
0
 def __init__(self,
              mini=0,
              maxi=1.0,
              target=None,
              window=None,
              location=None,
              boost=1.0):
     """Initialize."""
     if target is not None:
         mini = maxi = target
     self.target = target
     self.mini = mini
     self.maxi = maxi
     self.window = window
     if isinstance(location, tuple):
         location = Location.from_tuple(location)
     if location is not None and (location.strand == -1):
         location = Location(location.start, location.end, 1)
     self.location = location
     self.boost = boost
Ejemplo n.º 20
0
 def initialize_on_problem(self, problem, role='constraint'):
     """Find out what sequence it is that we are supposed to conserve."""
     if self.location is None:
         location = Location(0, len(problem.sequence), 1)
         result = self.copy_with_changes(location=location)
     else:
         result = self
     if not all([len(c) == len(result.location) for c in result.choices]):
         raise ValueError("All sequence choices should have the same "
                          "length as the region on which the spec is "
                          "applied.")
     return result
Ejemplo n.º 21
0
    def initialize_on_problem(self, problem, role):
        """Find out what sequence it is that we are supposed to conserve."""
        if self.location is None:
            location = Location(0, len(problem.sequence), 1)
            result = self.copy_with_changes(location=location)
        else:
            result = self

        if self.target_sequence is None:
            result = result.copy_with_changes()
            result.target_sequence = self.extract_subsequence(problem.sequence)
        return result
Ejemplo n.º 22
0
 def __init__(self, choices=None, location=None, boost=1.0):
     """Initialize."""
     choices = [
         SequencePattern.from_string(c) if isinstance(c, str) else c
         for c in choices
     ]
     # PRECOMPUTE ALL VARIANTS
     choices = [
         variant for choice in choices for variant in choice.all_variants()
     ]
     self.choices = choices
     self.location = Location.from_data(location)
     self.boost = boost
Ejemplo n.º 23
0
 def __init__(self,
              location=None,
              indices=None,
              target_sequence=None,
              boost=1.0):
     """Initialize."""
     if isinstance(location, tuple):
         location = Location.from_tuple(location)
     self.location = location
     self.indices = np.array(indices) if (indices is not None) else None
     self.target_sequence = target_sequence
     # self.passive_objective = passive_objective
     self.boost = boost
Ejemplo n.º 24
0
    def __init__(self,
                 stem_size=20,
                 hairpin_window=200,
                 location=None,
                 boost=1.0):
        """Initialize."""
        if isinstance(location, tuple):
            location = Location.from_tuple(location)

        self.stem_size = stem_size
        self.hairpin_window = hairpin_window
        self.location = location
        self.boost = boost
Ejemplo n.º 25
0
    def initialize_on_problem(self, problem, role):
        """Get translation from the sequence if it is not already set."""
        if self.location is None:
            location = Location(0, len(problem.sequence), 1)
            result = self.copy_with_changes()
            result.set_location(location)
        else:
            result = self
        if result.translation is None:
            subsequence = result.location.extract_sequence(problem.sequence)
            translation = translate(subsequence, self.codons_translations)

            result = result.copy_with_changes(translation=translation)
        return result
Ejemplo n.º 26
0
 def evaluate(self, problem):
     """Score is the number of wrong-translation codons."""
     location = (self.location if self.location is not None else
                 Location(0, len(problem.sequence)))
     subsequence = location.extract_sequence(problem.sequence)
     translation = translate(subsequence, self.codons_translations)
     errors = [
         ind
         for ind in range(len(translation))
         if translation[ind] != self.translation[ind]
     ]
     errors_locations = [
         Location(3 * ind, 3 * (ind + 1)) if self.location.strand >= 0 else
         Location(start=self.location.end - 3 * (ind + 1),
                  end=self.location.end - 3 * ind,
                  strand=-1)
         for ind in errors
     ]
     success = (len(errors) == 0)
     return SpecEvaluation(self, problem, score=-len(errors),
                           locations=errors_locations,
                           message="All OK." if success else
                           "Wrong translation at indices %s" % errors)
Ejemplo n.º 27
0
 def __init__(
     self,
     mini=0,
     maxi=1.0,
     target=None,
     window=None,
     location=None,
     boost=1.0,
 ):
     """Initialize."""
     if isinstance(mini, str):
         mini, maxi, target, window = self.string_to_parameters(mini)
     if target is not None:
         mini = maxi = target
     self.target = target
     self.mini = mini
     self.maxi = maxi
     self.window = window
     location = Location.from_data(location)
     if location is not None and (location.strand == -1):
         location = Location(location.start, location.end, 1)
     self.location = location
     self.boost = boost
Ejemplo n.º 28
0
    def evaluate(self, problem):
        """Return a score equal to -number_of_equalities.

        Locations are "binned" equality regions. Each bin has a length
        in nucleotides equal to ``localization_interval_length`.`
        """

        # FIND THE INDICES WHERE THE SEQUENCE IS UNCHANGED

        # Note: at this stage any minimum_percent or amount_percent have been
        # transformed into abolsute self.minimum and self.amount.

        target = self.reference
        sequence = self.extract_subsequence(problem.sequence)
        equalities = np.nonzero(
            1 - sequences_differences_array(sequence, target))[0]
        if self.indices is not None:
            equalities = self.indices[equalities]
        elif self.location is not None:
            if self.location.strand == -1:
                equalities = self.location.end - equalities
            else:
                equalities = equalities + self.location.start

        def indices_to_intervals(indices):
            intervals = group_nearby_indices(
                indices, max_group_spread=self.localization_interval_length)
            return [(interval[0], interval[-1] + 1) for interval in intervals]

        if self.indices is not None:
            n_indices = len(self.indices)
        else:
            n_indices = len(self.location)
        n_differences = n_indices - len(equalities)
        if self.minimum is not None:
            score = n_differences - self.minimum
            intervals = indices_to_intervals(equalities)
        else:
            score = -abs(n_differences - self.amount)
            if n_differences <= self.amount:
                intervals = indices_to_intervals(equalities)
            else:
                differences = [
                    i for i in self.location.indices if i not in equalities
                ]
                intervals = indices_to_intervals(differences)
        locations = ([self.location] if (self.minimum is not None) else
                     [Location(start, end, 1) for start, end in intervals])
        return SpecEvaluation(self, problem, score=score, locations=locations)
Ejemplo n.º 29
0
    def localized(self, location, problem=None, with_righthand=True):
        """Generic localization method for codon specifications.

        Calls the class'  ``.localized_on_window`` method at the end.

        """
        if self.location is not None:
            overlap = self.location.overlap_region(location)
            if overlap is None:
                return None
            else:
                # return self
                o_start, o_end = overlap.start, overlap.end
                w_start, w_end = self.location.start, self.location.end

                if self.location.strand != -1:
                    start_codon = int((o_start - w_start) / 3)
                    end_codon = int((o_end - w_start - 1) / 3) + 1
                    new_location = Location(
                        start=w_start + 3 * start_codon,
                        end=min(w_end, w_start + 3 * (end_codon)),
                        strand=self.location.strand,
                    )
                else:
                    start_codon = int((w_end - o_end) / 3)
                    end_codon = int((w_end - o_start - 1) / 3) + 1
                    new_location = Location(
                        start=max(w_start, w_end - 3 * (end_codon)),
                        end=w_end - 3 * start_codon,
                        strand=self.location.strand,
                    )
                return self.localized_on_window(
                    new_location, start_codon, end_codon
                )
        else:
            return self
Ejemplo n.º 30
0
    def insert_pattern_in_problem(self, problem, reverse=False):
        """Insert the pattern in the problem's sequence by successive tries.

        This heuristic is attempted to get the number of occurences in the
        pattern from 0 to some number
        """
        sequence_to_insert = self.pattern.sequence
        if reverse:
            sequence_to_insert = reverse_complement(sequence_to_insert)
        L = self.pattern.size
        starts = range(self.location.start, self.location.end - L)
        if self.center:
            center = 0.5 * (self.location.start + self.location.end)
            starts = sorted(starts, key=lambda s: abs(s - center))
        for start in starts:
            new_location = Location(start, start + L, self.location.strand)
            new_constraint = EnforceSequence(
                sequence=sequence_to_insert, location=new_location
            )
            new_space = MutationSpace.from_optimization_problem(
                problem, new_constraints=[new_constraint]
            )
            if len(new_space.unsolvable_segments) > 0:
                continue
            new_sequence = new_space.constrain_sequence(problem.sequence)
            new_constraints = problem.constraints + [new_constraint]
            new_problem = DnaOptimizationProblem(
                sequence=new_sequence,
                constraints=new_constraints,
                mutation_space=new_space,
                logger=None,
            )
            if self.evaluate(new_problem).passes:
                try:
                    new_problem.resolve_constraints()
                    problem.sequence = new_problem.sequence
                    return
                except NoSolutionError:
                    pass
        if (not reverse) and (not self.pattern.is_palyndromic):
            self.insert_pattern_in_problem(problem, reverse=True)
            return
        raise NoSolutionError(
            problem=problem,
            location=self.location,
            message="Insertion of pattern %s in %s failed"
            % (self.pattern.sequence, self.location),
        )