def initialized_on_problem(self, problem, role): """Find out what sequence it is that we are supposed to conserve.""" if not hasattr(self, 'ends_locations') or self.ends_locations is None: L = len(problem.sequence) wsize = self.window_size ends_locations = [Location(0, wsize), Location(L - wsize, L)] return self.copy_with_changes(ends_locations=ends_locations) else: return self
def evaluate(self, problem): """Score as (-total number of blast identities in matches).""" location = self.location if location is None: location = Location(0, len(problem.sequence)) sequence = location.extract_sequence(problem.sequence) blast_record = blast_sequence(sequence, blast_db=self.blast_db, subject_sequences=self.sequences, word_size=self.word_size, perc_identity=self.perc_identity, num_alignments=self.num_alignments, num_threads=self.num_threads, ungapped=self.ungapped, e_value=self.e_value, culling_limit=self.culling_limit) if isinstance(blast_record, list): alignments = [ alignment for rec in blast_record for alignment in rec.alignments ] else: alignments = blast_record.alignments query_hits = [ (min(hit.query_start, hit.query_end) + location.start - 1, max(hit.query_start, hit.query_end) + location.start, 1 - 2 * (hit.query_start > hit.query_end), hit.identities) for alignment in alignments for hit in alignment.hsps ] locations = sorted([(start, end, ids) for (start, end, strand, ids) in query_hits if (end - start) >= self.min_align_length]) # locations = [ # (r[0][0], r[-1][-1]) # for r in group_nearby_segments(locations, max_start_spread=2) # ] score = -sum([ids for start, end, ids in locations]) locations = [Location(start, end) for start, end, ids in locations] if locations == []: return SpecEvaluation(self, problem, score=1, message="Passed: no BLAST match found") return SpecEvaluation(self, problem, score=score, locations=locations, message="Failed - matches at %s" % locations)
def __init__(self, location=None, translation=None, boost=1.0): """Initialize.""" self.translation = translation if isinstance(location, tuple): location = Location.from_tuple(location, default_strand=+1) if (location is not None) and (location.strand not in [-1, 1]): location = Location(location.start, location.end, 1) self.set_location(location) self.boost = boost self.initialize_translation_from_problem = (translation is None) self.initialize_location_from_problem = (location is None)
def codon_index_to_location(self, index): if self.location.strand >= 0: return Location( start=self.location.start + 3 * index, end=self.location.start + 3 * (index + 1), strand=1 ) else: return Location( start=self.location.end - 3 * (index + 1), end=self.location.end - 3 * index, strand=-1, )
def __init__(self, min_length, location=None, extended_location=None, include_reverse_complement=True, boost=1.0, localization_data=None): """Initialize.""" self.min_length = min_length if isinstance(location, tuple): location = Location.from_tuple(location) self.location = location if isinstance(extended_location, tuple): extended_location = Location.from_tuple(extended_location) self.extended_location = extended_location self.include_reverse_complement = include_reverse_complement self.boost = 1.0 self.localization_data = localization_data
def __init__(self, max_energy=-5.0, location=None, optimize_initiator=False, boost=1.0): self.max_e = max_energy self.boost = boost self.optimize_initiator = optimize_initiator if isinstance(location, tuple): location = Location.from_tuple(location) if location is not None and (location.strand == -1): location = Location(location.start, location.end, 1) self.location = location
def global_evaluation(self, problem): extract_kmer = self.get_kmer_extractor(problem.sequence) kmers_locations = defaultdict(lambda: []) start, end = self.extended_location.start, self.extended_location.end for i in range(start, end - self.min_length): kmers_locations[extract_kmer(i)].append((i, i + self.min_length)) locations = sorted([ Location(start_, end_) for locations_list in kmers_locations.values() for start_, end_ in locations_list if len(locations_list) > 1 and ( self.location.start < start_ < end_ < self.location.end) ], key=lambda l: l.start) if locations == []: return SpecEvaluation( self, problem, score=0, message="Passed: no nonunique %d-mer found." % self.min_length) return SpecEvaluation( self, problem, score=-len(locations), locations=locations, message="Failed, the following positions are the first occurences " "of non-unique segments %s" % locations)
def __init__( self, amount=None, amount_percent=None, minimum=None, minimum_percent=None, location=None, indices=None, reference=None, boost=1.0, ): """Initialize.""" # raise NotImplementedError("This class is not yet implemented") # if location is None and (indices is not None): # location = (min(indices), max(indices) + 1) self.location = Location.from_data(location) if (self.location is not None) and self.location.strand == -1: self.location.strand = 1 self.indices = np.array(indices) if (indices is not None) else None self.reference = reference # self.passive_objective = passive_objective self.amount = amount self.amount_percent = amount_percent self.minimum = minimum self.minimum_percent = minimum_percent if isinstance(amount, str) and amount.endswith("%"): self.amount = None self.amount_percent = float(amount[:-1]) if isinstance(minimum, str) and minimum.endswith("%"): self.minimum = None self.minimum_percent = float(minimum[:-1]) self.boost = boost
def __init__( self, location=None, tmin=50, tmax=70, max_homology_length=6, avoid_heterodim_with=None, max_heterodim_tm=5, avoided_repeats=((2, 5), (3, 4), (4, 3)), ): location = Location.from_data(location) specs = { "unique_sequence": UniquifyAllKmers( k=max_homology_length, location=location ), "melting_temperature": EnforceMeltingTemperature( mini=tmin, maxi=tmax, location=location ), **{ "repeats_%d_%d" % (k, n): AvoidPattern( RepeatedKmerPattern(k, n), location=location ) for (k, n) in avoided_repeats }, } if avoid_heterodim_with is not None: specs["avoid_heterodimerization"] = AvoidHeterodimerization( other_primers_sequences=avoid_heterodim_with, tmax=max_heterodim_tm, location=location, ) self.register_specifications(specs)
def __init__(self, blast_db=None, sequences=None, word_size=4, perc_identity=100, num_alignments=100000, num_threads=3, min_align_length=20, ungapped=True, e_value=1e80, culling_limit=1, location=None): """Initialize.""" if isinstance(location, tuple): location = Location.from_tuple(location) self.blast_db = blast_db self.sequences = sequences self.word_size = word_size self.perc_identity = perc_identity self.num_alignments = num_alignments self.num_threads = num_threads self.min_align_length = min_align_length self.location = location self.e_value = e_value self.ungapped = ungapped self.culling_limit = culling_limit
def evaluate(self, problem): """Return a score equal to -number_of modifications. Locations are "binned" modifications regions. Each bin has a length in nucleotides equal to ``localization_interval_length`.` """ target = self.target_sequence sequence = self.extract_subsequence(problem.sequence) discrepancies = np.nonzero( sequences_differences_array(sequence, target))[0] if self.indices is not None: discrepancies = self.indices[discrepancies] elif self.location is not None: if self.location.strand == -1: discrepancies = self.location.end - discrepancies else: discrepancies = discrepancies + self.location.start intervals = [(r[0], r[-1]) for r in group_nearby_indices( discrepancies, max_group_spread=self.localization_interval_length)] locations = [Location(start, end, 1) for start, end in intervals] return SpecEvaluation(self, problem, score=-len(discrepancies), locations=locations)
def __init__(self, sequence=None, location=None, boost=1.0): """Initialize.""" self.sequence = sequence if isinstance(location, tuple): location = Location.from_tuple(location, default_strand=+1) self.location = location self.boost = boost
def __init__( self, stem_size=20, hairpin_window=200, location=None, boost=1.0 ): """Initialize.""" self.stem_size = stem_size self.hairpin_window = hairpin_window self.location = Location.from_data(location) self.boost = boost
def initialize_on_problem(self, problem, role): """Find out what sequence it is that we are supposed to conserve.""" if self.location is None: result = self.copy_with_changes() result.location = Location(0, len(problem.sequence), 1) return result else: return self
def codons_indices_to_locations(self, indices): """Convert a list of codon positions to a list of Locations""" indices = np.array(indices) if self.location.strand == -1: indices = sorted(self.location.end - indices) return [ Location(group[0] - 3, group[-1], strand=-1) for group in group_nearby_indices( indices, max_group_spread=self.localization_group_spread) ] else: indices += self.location.start return [ Location(group[0], group[-1] + 3) for group in group_nearby_indices( indices, max_group_spread=self.localization_group_spread) ]
def __init__(self, species=None, location=None, codon_usage_table=None, boost=1.0): self.boost = boost self.location = Location.from_data(location) self.species = species self.codon_usage_table = self.get_codons_table(species, codon_usage_table)
def __init__( self, pattern=None, occurences=1, location=None, center=True, boost=1.0 ): """Initialize.""" if isinstance(pattern, str): pattern = SequencePattern.from_string(pattern) self.pattern = pattern self.location = Location.from_data(location) self.occurences = occurences self.center = center self.boost = boost
def __init__(self, locations, compatibility_condition, condition_label='', boost=1.0): self.locations = [ Location.from_tuple(location) for location in locations ] self.compatibility_condition = compatibility_condition self.condition_label = condition_label self.boost = boost
def __init__(self, mini=0, maxi=1.0, target=None, window=None, location=None, boost=1.0): """Initialize.""" if target is not None: mini = maxi = target self.target = target self.mini = mini self.maxi = maxi self.window = window if isinstance(location, tuple): location = Location.from_tuple(location) if location is not None and (location.strand == -1): location = Location(location.start, location.end, 1) self.location = location self.boost = boost
def initialize_on_problem(self, problem, role='constraint'): """Find out what sequence it is that we are supposed to conserve.""" if self.location is None: location = Location(0, len(problem.sequence), 1) result = self.copy_with_changes(location=location) else: result = self if not all([len(c) == len(result.location) for c in result.choices]): raise ValueError("All sequence choices should have the same " "length as the region on which the spec is " "applied.") return result
def initialize_on_problem(self, problem, role): """Find out what sequence it is that we are supposed to conserve.""" if self.location is None: location = Location(0, len(problem.sequence), 1) result = self.copy_with_changes(location=location) else: result = self if self.target_sequence is None: result = result.copy_with_changes() result.target_sequence = self.extract_subsequence(problem.sequence) return result
def __init__(self, choices=None, location=None, boost=1.0): """Initialize.""" choices = [ SequencePattern.from_string(c) if isinstance(c, str) else c for c in choices ] # PRECOMPUTE ALL VARIANTS choices = [ variant for choice in choices for variant in choice.all_variants() ] self.choices = choices self.location = Location.from_data(location) self.boost = boost
def __init__(self, location=None, indices=None, target_sequence=None, boost=1.0): """Initialize.""" if isinstance(location, tuple): location = Location.from_tuple(location) self.location = location self.indices = np.array(indices) if (indices is not None) else None self.target_sequence = target_sequence # self.passive_objective = passive_objective self.boost = boost
def __init__(self, stem_size=20, hairpin_window=200, location=None, boost=1.0): """Initialize.""" if isinstance(location, tuple): location = Location.from_tuple(location) self.stem_size = stem_size self.hairpin_window = hairpin_window self.location = location self.boost = boost
def initialize_on_problem(self, problem, role): """Get translation from the sequence if it is not already set.""" if self.location is None: location = Location(0, len(problem.sequence), 1) result = self.copy_with_changes() result.set_location(location) else: result = self if result.translation is None: subsequence = result.location.extract_sequence(problem.sequence) translation = translate(subsequence, self.codons_translations) result = result.copy_with_changes(translation=translation) return result
def evaluate(self, problem): """Score is the number of wrong-translation codons.""" location = (self.location if self.location is not None else Location(0, len(problem.sequence))) subsequence = location.extract_sequence(problem.sequence) translation = translate(subsequence, self.codons_translations) errors = [ ind for ind in range(len(translation)) if translation[ind] != self.translation[ind] ] errors_locations = [ Location(3 * ind, 3 * (ind + 1)) if self.location.strand >= 0 else Location(start=self.location.end - 3 * (ind + 1), end=self.location.end - 3 * ind, strand=-1) for ind in errors ] success = (len(errors) == 0) return SpecEvaluation(self, problem, score=-len(errors), locations=errors_locations, message="All OK." if success else "Wrong translation at indices %s" % errors)
def __init__( self, mini=0, maxi=1.0, target=None, window=None, location=None, boost=1.0, ): """Initialize.""" if isinstance(mini, str): mini, maxi, target, window = self.string_to_parameters(mini) if target is not None: mini = maxi = target self.target = target self.mini = mini self.maxi = maxi self.window = window location = Location.from_data(location) if location is not None and (location.strand == -1): location = Location(location.start, location.end, 1) self.location = location self.boost = boost
def evaluate(self, problem): """Return a score equal to -number_of_equalities. Locations are "binned" equality regions. Each bin has a length in nucleotides equal to ``localization_interval_length`.` """ # FIND THE INDICES WHERE THE SEQUENCE IS UNCHANGED # Note: at this stage any minimum_percent or amount_percent have been # transformed into abolsute self.minimum and self.amount. target = self.reference sequence = self.extract_subsequence(problem.sequence) equalities = np.nonzero( 1 - sequences_differences_array(sequence, target))[0] if self.indices is not None: equalities = self.indices[equalities] elif self.location is not None: if self.location.strand == -1: equalities = self.location.end - equalities else: equalities = equalities + self.location.start def indices_to_intervals(indices): intervals = group_nearby_indices( indices, max_group_spread=self.localization_interval_length) return [(interval[0], interval[-1] + 1) for interval in intervals] if self.indices is not None: n_indices = len(self.indices) else: n_indices = len(self.location) n_differences = n_indices - len(equalities) if self.minimum is not None: score = n_differences - self.minimum intervals = indices_to_intervals(equalities) else: score = -abs(n_differences - self.amount) if n_differences <= self.amount: intervals = indices_to_intervals(equalities) else: differences = [ i for i in self.location.indices if i not in equalities ] intervals = indices_to_intervals(differences) locations = ([self.location] if (self.minimum is not None) else [Location(start, end, 1) for start, end in intervals]) return SpecEvaluation(self, problem, score=score, locations=locations)
def localized(self, location, problem=None, with_righthand=True): """Generic localization method for codon specifications. Calls the class' ``.localized_on_window`` method at the end. """ if self.location is not None: overlap = self.location.overlap_region(location) if overlap is None: return None else: # return self o_start, o_end = overlap.start, overlap.end w_start, w_end = self.location.start, self.location.end if self.location.strand != -1: start_codon = int((o_start - w_start) / 3) end_codon = int((o_end - w_start - 1) / 3) + 1 new_location = Location( start=w_start + 3 * start_codon, end=min(w_end, w_start + 3 * (end_codon)), strand=self.location.strand, ) else: start_codon = int((w_end - o_end) / 3) end_codon = int((w_end - o_start - 1) / 3) + 1 new_location = Location( start=max(w_start, w_end - 3 * (end_codon)), end=w_end - 3 * start_codon, strand=self.location.strand, ) return self.localized_on_window( new_location, start_codon, end_codon ) else: return self
def insert_pattern_in_problem(self, problem, reverse=False): """Insert the pattern in the problem's sequence by successive tries. This heuristic is attempted to get the number of occurences in the pattern from 0 to some number """ sequence_to_insert = self.pattern.sequence if reverse: sequence_to_insert = reverse_complement(sequence_to_insert) L = self.pattern.size starts = range(self.location.start, self.location.end - L) if self.center: center = 0.5 * (self.location.start + self.location.end) starts = sorted(starts, key=lambda s: abs(s - center)) for start in starts: new_location = Location(start, start + L, self.location.strand) new_constraint = EnforceSequence( sequence=sequence_to_insert, location=new_location ) new_space = MutationSpace.from_optimization_problem( problem, new_constraints=[new_constraint] ) if len(new_space.unsolvable_segments) > 0: continue new_sequence = new_space.constrain_sequence(problem.sequence) new_constraints = problem.constraints + [new_constraint] new_problem = DnaOptimizationProblem( sequence=new_sequence, constraints=new_constraints, mutation_space=new_space, logger=None, ) if self.evaluate(new_problem).passes: try: new_problem.resolve_constraints() problem.sequence = new_problem.sequence return except NoSolutionError: pass if (not reverse) and (not self.pattern.is_palyndromic): self.insert_pattern_in_problem(problem, reverse=True) return raise NoSolutionError( problem=problem, location=self.location, message="Insertion of pattern %s in %s failed" % (self.pattern.sequence, self.location), )