def _make_pbs_sequence(nuclease, reference, pbs_min_length, pbs_max_length, cloning_strategy, cloning_options, **options): """Find a suggested PBS length, and generate all possible PBS candidate lengths. Selects the shortest PBS sequence with a GC content in the range [0.4,0.6]. If no sequence is within this range, selects the shortest PBS with a GC content closest to 0.5. """ cloning_strategy = nuclease.get_cloning_strategy(cloning_strategy) pbs_length = pbs_min_length - 1 lengths = [] while pbs_length < pbs_max_length: pbs_length += 1 pbs = reference[-pbs_length:] if not cloning_strategy.can_express(reverse_complement(pbs), ** cloning_options): continue if 0.4 <= gc(pbs) <= 0.6: break lengths.append((abs(0.5 - gc(pbs)), len(pbs), pbs)) else: try: pbs = sorted(lengths, key=lambda x: x[:1])[0][2] except IndexError: pbs = reference[-pbs_min_length:] # Create all possible PBS sequences within range limits. alt_lengths = [ reference[-pbs_length:] for pbs_length in range(pbs_min_length, pbs_max_length + 1) ] alt_lengths = [ seq for seq in alt_lengths if cloning_strategy.can_express( reverse_complement(seq), **cloning_options) ] return pbs, alt_lengths
def make_extension_sequence(cls, nuclease, reference_sequence, altered_sequence, spacer_strand, spacer_cut_site, cut_dist, alteration_length, pbs_min_length, pbs_max_length, rt_min_length, rt_max_length, cloning_strategy, cloning_options, nuclease_options, **options): """Create the pegRNA extension sequence. pegRNA extension sequences consist of a PBS and a RT template. The PBS is upstream of the cut site. The RT template is downstream of the cut site and contains the edit sequence. """ nucleotide_difference = len(altered_sequence) - len(reference_sequence) if spacer_strand == 1: nucleotide_difference = min(0, nucleotide_difference) pbs_reference = reference_sequence[:spacer_cut_site] rt_reference = altered_sequence[spacer_cut_site:] else: pbs_reference = reverse_complement( reference_sequence[spacer_cut_site:]) rt_reference = reverse_complement( altered_sequence[:spacer_cut_site + nucleotide_difference]) pbs, pbs_lengths = cls._make_pbs_sequence(nuclease, pbs_reference.upper(), pbs_min_length, pbs_max_length, cloning_strategy, cloning_options, **options) rt, rt_lengths = cls._make_rt_sequence( nuclease, rt_reference, cut_dist, nucleotide_difference, alteration_length, rt_min_length, rt_max_length, cloning_strategy, cloning_options, nuclease_options, **options) pbs_length = len(pbs) rt_length = len(rt) # Generate all combinations of PBS and RT template sequences that are not identical to the primary suggestion. alternate_extensions = [] for alt_pbs in pbs_lengths: alt_pbs_length = len(alt_pbs) alt_pbs_gc = round(gc(alt_pbs), 2) for alt_rt in rt_lengths: alt_rt_length = len(alt_rt) alt_rt_gc = round(gc(alt_rt), 2) if alt_pbs_length == pbs_length and alt_rt_length == rt_length: continue alternate_extensions.append({ 'pbs_length': alt_pbs_length, 'rt_template_length': alt_rt_length, 'sequence': reverse_complement(alt_pbs + alt_rt), 'pbs_gc': alt_pbs_gc, 'rt_gc': alt_rt_gc }) return pbs_length, rt_length, reverse_complement( pbs + rt), alternate_extensions
def _make_extension_sequence(self, **options): """Make the extension sequence for the pegRNA. Also creates visual_extension, which can be used by the frontend for visualization. """ reference_sequence = self.tracker.original_sequence altered_sequence = str(self.tracker) alteration_position = sum(self.tracker.alterations[0]) if alteration_position >= self.spacer_cut_site: cut_dist = alteration_position + self.tracker.alteration_length - self.spacer_cut_site else: cut_dist = self.spacer_cut_site - alteration_position if self.repair: reference_sequence, altered_sequence = altered_sequence, reference_sequence pbs_length, rt_template_length, extension, alternate_extensions = self.design_strategy.make_extension_sequence( self.nuclease, reference_sequence, altered_sequence, self.spacer_strand, self.spacer_cut_site, cut_dist, self.tracker.number_of_alterations, cloning_strategy=self.cloning_strategy, cloning_options=self.cloning_options, nuclease_options=self.nuclease_options, **options) self.pbs_length = pbs_length self.rt_template_length = rt_template_length self.extension = extension self.rt_template = extension[:rt_template_length] self.pbs = extension[rt_template_length:] self.alternate_extensions = alternate_extensions visual_extension = extension start = self.spacer_cut_site - self.pbs_length if self.spacer_strand == 1: visual_extension = reverse_complement(visual_extension) if self.repair: if self.spacer_strand == -1: start = self.spacer_cut_site - self.rt_template_length + self.tracker.number_of_deletions - self.tracker.number_of_insertions visual_extension = self.tracker.seq_from_original_coordinates( start, start + len(visual_extension)) else: if self.spacer_strand == -1: start = self.spacer_cut_site - self.rt_template_length - self.tracker.number_of_deletions + self.tracker.number_of_insertions visual_extension = self.tracker.seq_from_new_coordinates( start, start + len(visual_extension)) if self.spacer_strand == 1: visual_extension = reverse_complement(visual_extension) self.visual_exension = visual_extension
def make_spacer_oligos(cls, spacer_sequence: str, scaffold: str) -> OligoDict: target = cls._spacer_to_cloning(spacer_sequence) return { 'top': ''.join(['cacc', target, scaffold[:5].lower()]), 'bottom': reverse_complement(target + scaffold[:9].lower()) }
def _make_rt_sequence(nuclease, reference, cut_dist, nucleotide_difference, alteration_length, rt_min_length, rt_max_length, cloning_strategy, cloning_options, nuclease_options, **options): """Find a suggested RT template length, and generate alterniative RT template lengths.""" cloning_strategy = nuclease.get_cloning_strategy(cloning_strategy) rt_template_length = rt_min_length to_position = cut_dist + rt_template_length + nucleotide_difference rt_template = reference[:to_position] last_valid = rt_template # For large alterations, longer template is probably preferred while (nuclease.filter_extension(rt_template, **nuclease_options) or rt_template_length <= alteration_length * 2 ) and rt_template_length <= rt_max_length: try: rt_template += reference[to_position] except IndexError: break if not nuclease.filter_extension( rt_template, ** nuclease_options) and cloning_strategy.can_express( reverse_complement(rt_template), **cloning_options): last_valid = rt_template to_position += 1 rt_template_length += 1 rt_template = last_valid # Create all possible RT templates within range limits. lengths = [] for rt_template_length in range(rt_min_length, rt_max_length + 1): template = reference[:cut_dist + rt_template_length + nucleotide_difference] if not nuclease.filter_extension( template, ** nuclease_options) and cloning_strategy.can_express( reverse_complement(template), **cloning_options): lengths.append(template) return rt_template, lengths
def make_nicking_oligos(cls, spacer_sequence: str, scaffold: str): target = cls._spacer_to_cloning(spacer_sequence) return { 'top': 'cacc' + target, 'bottom': reverse_complement(target + scaffold[:4].lower()) }
def make_extension_oligos(cls, extension_sequence: str, scaffold: str) -> OligoDict: return { 'top': scaffold[-4:].lower() + extension_sequence, 'bottom': reverse_complement(''.join([extension_sequence, 'tttt'])) }
def make_scaffold_oligos(cls, scaffold: str) -> OligoDict: return { 'top': scaffold[5:-4], 'bottom': reverse_complement(scaffold[9:]) }
def find_spacers(nuclease, reference_sequence, altered_sequence, start, end, spacer_search_range, cloning_method, cloning_options, **options): """Find candidate spacers for pegRNA selection. Finds all spacers with a cut site within spacer_search_range of the edit. Sorts spacers according to pam disruption, distance to edit and score. """ cloning_method = nuclease.get_cloning_strategy(cloning_method) spacers = [] scoring_spacers = [] sense = reference_sequence[:start + nuclease.downstream_from_cut_site] sense_offset = max( 0, start - spacer_search_range - nuclease.cut_site_position) nucleotide_difference = len(altered_sequence) - len(reference_sequence) antisense = reverse_complement( reference_sequence[end - nuclease.downstream_from_cut_site - max(0, nucleotide_difference):]) antisense_offset = end - max(0, nucleotide_difference) + min( len(antisense) - (spacer_search_range + nuclease.cut_site_position + nuclease.downstream_from_cut_site), 0) pam_motif = dgn_to_regex(nuclease.pam_motif) + '$' for match in regex.finditer( nuclease.target_motif, sense[-spacer_search_range - nuclease.cut_site_position - nuclease.downstream_from_cut_site:], regex.IGNORECASE, overlapped=True): spacer = match.group('spacer') pam = match.group('PAM') pos = sense_offset + match.start() + len(match.group('upstream')) pam_disrupted = not regex.search( pam_motif, altered_sequence[pos + len(spacer):pos + len(spacer) + len(pam)], regex.IGNORECASE) cut_site = pos + nuclease.cut_site_position - len( match.group('upstream')) distance = start - cut_site if cloning_method.can_express(spacer, **cloning_options): spacers.append({ 'spacer': spacer, 'position': pos, 'cut_site': cut_site, 'strand': 1, 'pam': (pam, pos + len(spacer)), 'pam_disrupted': pam_disrupted, 'distance': distance, }) scoring_spacers.append(match.group().upper()) for match in regex.finditer( nuclease.target_motif, antisense[-spacer_search_range - nuclease.cut_site_position - nuclease.downstream_from_cut_site:], regex.IGNORECASE, overlapped=True): spacer = match.group('spacer') pam = match.group('PAM') pos = antisense_offset + spacer_search_range - match.start() - len( match.group('upstream')) - 1 + nuclease.cut_site_position pam_disrupted = not regex.search( pam_motif, reverse_complement( altered_sequence[pos - len(spacer) - len(pam) + 1 + nucleotide_difference:pos + 1 + nucleotide_difference - len(spacer)]), regex.IGNORECASE) cut_site = pos - nuclease.cut_site_position + len( match.group('upstream')) + 1 distance = cut_site - end + max(0, nucleotide_difference) if cloning_method.can_express(spacer, **cloning_options): spacers.append({ 'spacer': spacer, 'position': pos, 'cut_site': cut_site, 'strand': -1, 'pam': (pam, pos - len(spacer) - len(pam) + 1 + nucleotide_difference), 'pam_disrupted': pam_disrupted, 'distance': distance, }) scoring_spacers.append(match.group().upper()) for i, score in enumerate(nuclease.score_spacers(scoring_spacers)): spacers[i]['score'] = score return sorted(spacers, key=lambda x: (not x['pam_disrupted'], x['distance'], x['score']))
def find_nicking_spacers(nuclease, reference_sequence, altered_sequence, spacer_strand, cut_site, scaffold, nicking_range, cloning_method, cloning_options, **options): """Find spacers for nicking the opposite strand.""" cloning_method = nuclease.get_cloning_strategy(cloning_method) spacers = [] scoring_spacers = [] nt_difference = len(altered_sequence) - len(reference_sequence) if spacer_strand == 1: reference_sequence = reverse_complement(reference_sequence) altered_sequence = reverse_complement(altered_sequence) cut_site = len(altered_sequence) - cut_site sequence = altered_sequence[max( 0, cut_site - nuclease.cut_site_position - nicking_range ):min(len(altered_sequence), cut_site + nuclease.downstream_from_cut_site + nicking_range)].upper() ref = reference_sequence[ max(0, cut_site - nuclease.cut_site_position - nicking_range):min( len(reference_sequence), cut_site + nuclease.downstream_from_cut_site + nicking_range - nt_difference)].upper() if cut_site - nuclease.cut_site_position - nicking_range > 0: cut_site = nuclease.cut_site_position + nicking_range for match in regex.finditer(nuclease.target_motif, sequence, regex.IGNORECASE, overlapped=True): spacer = match.group('spacer') pos = match.start() + len(match.group('upstream')) wt_pos = pos cut = match.start() + nuclease.cut_site_position nick_location = cut_site - cut if nick_location < 0: wt_pos -= nt_difference kind = '3' wt_score = 1 alt_bind = sequence[pos:pos + len(spacer) + len(nuclease.pam_motif)].upper() wt_bind = ref[wt_pos:wt_pos + len(spacer) + len(nuclease.pam_motif)].upper() if nuclease._is3b(alt_bind, wt_bind): kind = '3b' wt_score = nuclease._calc_wt_score(alt_bind, wt_bind) info = cloning_method.make_nicking_oligos(spacer, scaffold) info['position'] = nick_location info['spacer'] = spacer info['kind'] = kind info['wt_score'] = wt_score info['offset'] = nuclease.cut_site_position - len( match.group('upstream')) if cloning_method.can_express(spacer, **cloning_options): spacers.append(info) scoring_spacers.append(match.group().upper()) for i, score in enumerate(nuclease.score_spacers(scoring_spacers)): spacers[i]['score'] = score return sorted( spacers, key=lambda x: (x['wt_score'], not (abs(x['position']) > 50), -x['score']))
def find_best_spacers(self, repair=False, nuclease=None, cloning_strategy=None, design_strategy=None, nuclease_options=None, cloning_options=None, **options): """Find pegRNA spacers. Required arguments: num_pegs -- number of pegRNAs to return Optional arguments: repair -- If true, designs pegRNAs from edited sequence -> wild type sequence, defaults to False. nuclease -- Which nuclease to design pegRNAs for, if None uses default nuclease. """ reference_sequence = self.original_sequence altered_sequence = self.__str__() alterations = self.alterations position = sum(alterations[0]) if nuclease is None: nuclease = django.conf.settings.DESIGN_CONF['default_nuclease'] nuclease = NUCLEASES[nuclease] design_strategy = nuclease.get_design_strategy(design_strategy) if repair: reference_sequence, altered_sequence = altered_sequence, reference_sequence spacers = design_strategy.find_spacers(nuclease, reference_sequence, altered_sequence, position, position + self.alteration_length, cloning_method=cloning_strategy, cloning_options=cloning_options, **options) for sp in spacers: spacer = sp['spacer'] pos = sp['position'] strand = sp['strand'] if strand == -1: pos = pos - len(spacer) + 1 if repair: visual_spacer = self.seq_from_new_coordinates( pos, pos + len(spacer)) else: visual_spacer = self.seq_from_original_coordinates( pos, pos + len(spacer)) if strand == -1: visual_spacer = reverse_complement(visual_spacer) sp['visual_spacer'] = visual_spacer return [ OligoSet(tracker=self, spacer=sp, repair=repair, nuclease=nuclease, nuclease_options=nuclease_options, cloning_strategy=cloning_strategy, cloning_options=cloning_options) for sp in spacers ]
def make_oligos(self, degenerate_sequence, silence_pam=False, **options): """Make oligos for cloning pegRNAs""" if not self.pam_disrupted and silence_pam: self.tracker = self.tracker.copy() if self.spacer_strand == 1: pam = self.nuclease.pam_motif else: pam = reverse_complement(self.nuclease.pam_motif) for i, j in enumerate( range(self.pam[1], self.pam[1] + len(self.pam[0]))): dgn = degenerate_sequence[j] pam_nt = pam[i] pam_dgn = degenerate_to_nucleotides[pam_nt] nt_dgn = degenerate_to_nucleotides[dgn] if pam_dgn < nt_dgn: for nt in nt_dgn: if nt not in pam_dgn: self.tracker.substitute(nt.lower(), j) self.pam_silenced = True break reference_sequence = self.tracker.original_sequence altered_sequence = str(self.tracker) self._make_extension_sequence(**options) upstream = reference_sequence[:self.spacer_cut_site] downstream = reference_sequence[self.spacer_cut_site:] alteration_position = sum(self.tracker.alterations[0]) if alteration_position >= self.spacer_cut_site: cut_dist = alteration_position + self.tracker.alteration_length - self.spacer_cut_site else: cut_dist = self.spacer_cut_site - alteration_position if self.spacer_strand == -1: upstream, downstream = reverse_complement( downstream), reverse_complement(upstream) self.oligos = self.cloning_strategy.design_cloning( spacer_sequence=self.spacer_sequence, scaffold=self.scaffold, extension_sequence=self.extension, upstream=upstream, downstream=downstream, cut_dist=cut_dist, cloning_options=self.cloning_options, **options) self.can_express = self.cloning_strategy.can_express( self.spacer_sequence, ** self.cloning_options) and self.cloning_strategy.can_express( self.extension, **self.cloning_options) extensions = [] for extension in self.alternate_extensions: if self.cloning_strategy.can_express(extension['sequence'], **self.cloning_options): extension[ 'oligos'] = self.cloning_strategy.alternate_extension( spacer_sequence=self.spacer_sequence, scaffold=self.scaffold, extension_sequence=extension['sequence'], upstream=upstream, downstream=downstream, cut_dist=cut_dist, cloning_options=self.cloning_options, **options) extensions.append(extension) self.alternate_extensions = extensions if self.repair: reference_sequence, altered_sequence = altered_sequence, reference_sequence if self.cloning_strategy.can_design_nicking: spacers = self.design_strategy.find_nicking_spacers( nuclease=self.nuclease, reference_sequence=reference_sequence, altered_sequence=altered_sequence, spacer_strand=self.spacer_strand, cut_site=self.spacer_cut_site, scaffold=self.scaffold, cloning_method=self.cloning_strategy, cloning_options=self.cloning_options, **options) for spacer in spacers: visual_spacer = spacer['spacer'] position = spacer['position'] if self.spacer_strand == 1: visual_spacer = reverse_complement(visual_spacer) pos = self.spacer_cut_site if self.spacer_strand == 1: pos += position - len(spacer['spacer']) + spacer['offset'] else: pos -= position + spacer['offset'] if self.repair: visual_spacer = self.tracker.seq_from_original_coordinates( pos, pos + len(visual_spacer)) spacer['push'] = 0 if self.tracker.number_of_insertions: spacer['push'] = (pos - self.tracker.index[pos][0]) + ( self.spacer_cut_site - self.tracker.index[self.spacer_cut_site][0]) else: visual_spacer = self.tracker.seq_from_new_coordinates( pos, pos + len(visual_spacer)) spacer['push'] = 0 if self.tracker.number_of_deletions: spacer['push'] = (self.tracker.index[pos][0] - pos) + ( self.tracker.index[self.spacer_cut_site][0] - self.spacer_cut_site) if self.spacer_strand == 1: spacer[ 'push'] -= self.tracker.number_of_insertions + self.tracker.number_of_deletions if self.spacer_strand == 1: visual_spacer = reverse_complement(visual_spacer) spacer['visual_spacer'] = visual_spacer self.nicking_spacers = spacers return self.oligos