def sites_outside_snp(target_seq, snp_pos, allele_pos_end): sites_outside = [] pe_len = len(pe.clean_recognition_sequence) pe_seq = expand_sequence(pe.clean_recognition_sequence.lower()) # seq_before_snp = snp.ex_wt_sequence[:snp.snp_pos - pe_len] # seq_after_snp = snp.ex_wt_sequence[snp.snp_pos + snp.wt_allele_len:] for i in range(0, len(target_seq) - pe_len): try: if i in range(snp_pos - pe_len, allele_pos_end): raise fexceps.DigestError for ii in range(0, pe_len): if not can_recognize(target_seq[i+ii], pe_seq[ii]): raise fexceps.DigestError sites_outside.append(i) except fexceps.DigestError: pass if not pe.clean_recognition_sequence == \ reverse_complement(pe.clean_recognition_sequence): pe_seq_rc = expand_sequence(reverse_complement(pe.clean_recognition_sequence.lower())) for i in range(0, len(target_seq) - pe_len): try: if i in range(snp_pos - pe_len, allele_pos_end): raise fexceps.DigestError for ii in range(0, pe_len): if not can_recognize(target_seq[i+ii], pe_seq_rc[ii]): raise fexceps.DigestError sites_outside.append(i) except fexceps.DigestError: pass return sites_outside
def pe_can_determine_snp(pe:PrototypeEnzyme, snp:Snp, max_num_of_mismatches): pe_len = len(pe.clean_recognition_sequence) pe_seq = expand_sequence(pe.clean_recognition_sequence.lower()) wt_sites = gen_mask(pe_len, pe_seq, snp.ex_wt_sequence, snp.snp_pos, snp.wt_pos_end, snp.wt_allele_len, ) mut_sites = gen_mask(pe_len, pe_seq, snp.ex_mut_sequence, snp.snp_pos, snp.mut_pos_end, snp.mut_allele_len, ) if not pe.clean_recognition_sequence == \ reverse_complement(pe.clean_recognition_sequence): pe_seq_rc = expand_sequence(reverse_complement(pe.clean_recognition_sequence.lower())) wt_sites = wt_sites + gen_mask(pe_len, pe_seq_rc, snp.ex_wt_sequence, snp.snp_pos, snp.wt_pos_end, snp.wt_allele_len, ) mut_sites = mut_sites + gen_mask(pe_len, pe_seq_rc, snp.ex_mut_sequence, snp.snp_pos, snp.mut_pos_end, snp.mut_allele_len, ) wt_filtered = [] for wt_site in wt_sites: try: for mut_site in mut_sites: wt_site = test_intercept(wt_site, mut_site) wt_filtered.append(wt_site) except fexceps.SitesCollisionError: pass mut_filtered = [] for mut_site in mut_sites: try: for wt_site in wt_sites: mut_site = test_intercept(mut_site, wt_site) mut_filtered.append(mut_site) except fexceps.SitesCollisionError: pass return (wt_filtered, mut_filtered)
def test_init_from_IUPAC(self): """ Snp("aaaaaaaaaaaaaaaaaaaaaaYaaaaaaaaaaaaaaaaaaaaaaa" ok """ snp = Snp("aaaaaaaaaaaaaaaaaaaaaaYaaaaaaaaaaaaaaaaaaaaaaa") self.assertEqual(snp.snp_pos, 23) self.assertEqual(snp.wt_allele, 'c') self.assertEqual(snp.ex_wt_sequence, expand_sequence('aaaaaaaaaaaaaaaaaaaaaacaaaaaaaaaaaaaaaaaaaaaaa')) self.assertEqual(snp.mut_allele, 't') self.assertEqual(snp.ex_mut_sequence, expand_sequence('aaaaaaaaaaaaaaaaaaaaaataaaaaaaaaaaaaaaaaaaaaaa'))
def __init__(self, sequence: str, name=None): def from_IUPAC(sequence: str): snp_positions = [] for char in "rymksw": if sequence.count(char) == 1: position = sequence.find(char) snp_positions.append((char, position, tuple(ambiguity_dict[char])))# IUPACdict[char])) if len(snp_positions) == 1: return snp_positions[0] return None def from_wt_slash_mut(sequence: str): snps = re.findall('\[[atgc]*/[atgc]*]', sequence) if len(snps) == 1: return snps[0], sequence.find(snps[0]), \ tuple(snps[0] .replace('[','') .replace(']','') .split('/')) return None self.name = name if name is not None else str(uuid.uuid1()) sequence = re.sub('[^{}]'.format('atgcrymkswbdhvnATGCRYMKSWBDHVN\[\]\/'), '', sequence.lower()) info_from_seq = from_wt_slash_mut(sequence) if info_from_seq is None: info_from_seq = from_IUPAC(sequence) if info_from_seq is not None: self.original_snp_sign, self.snp_pos, \ (self.wt_allele, self.mut_allele) = info_from_seq self.snp_pos += 1 # from 0 based to real positions self.wt_allele_len = len(self.wt_allele) self.wt_pos_end = self.snp_pos + self.wt_allele_len - 1 self.mut_allele_len = len(self.mut_allele) self.mut_pos_end = self.snp_pos + self.mut_allele_len - 1 self.wt_sequence = sequence.replace(self.original_snp_sign, self.wt_allele, 1) self.mut_sequence = sequence.replace(self.original_snp_sign, self.mut_allele, 1) self.ex_wt_sequence = expand_sequence(self.wt_sequence) self.ex_mut_sequence = expand_sequence(sequence.replace(self.original_snp_sign, self.mut_allele, 1)) self.digest_penzymes = [] if self.snp_pos < 50 or (len(self.wt_sequence) - self.wt_pos_end < 50 \ or len(self.mut_sequence) - self.mut_pos_end < 50): raise fexceps.GetSNPFromSequenceError('Flanking sequence is to short') else: raise fexceps.GetSNPFromSequenceError('No valid SNP info in seq: %r' % sequence)