def find_discriminating_primers(cls, alignment, plen=25, max_mismatches=3, min_match_mismatches=3, first_match_mismatches=3, first_may_match=0, AT_first=True, outgroup=1, reverse=False): primers = [] if reverse: alignment = alignment.reverse_complement() ali_len = alignment.get_alignment_length() if isinstance(plen, (tuple, list)): min_len = plen[0] max_len = plen[1] else: min_len = max_len = plen for s in alignment.cols: if s < min_len - 1: continue mlen = min_len mis = 0 mmis = min_match_mismatches pali = alignment[:, max(0, s - max_len):s] if AT_first and pali[0, -1].upper() not in 'AT': continue good = True cur_len = 0 first_matched = 0 for i in pali.rcols: cur_len += 1 col = pali[:, i] group = col[:-outgroup] if cur_len <= first_match_mismatches + first_matched: if cur_len <= first_may_match: if not issingleletter(group): good = False break elif not cls._match_mismatch(col, outgroup): first_matched += 1 elif not cls._match_mismatch(col, outgroup): good = False break continue if '-' in group: if issingleletter(group, '-'): mlen += 1 else: good = False break if cls._match_mismatch(col, outgroup): mmis -= 1 if not issingleletter(group): mis += 1 if cur_len >= mlen and mmis <= 0 and mis <= max_mismatches: break good &= cur_len >= mlen and mmis <= 0 and mis <= max_mismatches if not good: continue pos = ali_len - s if reverse else s + 1 primers.append( (pos, pali[:-outgroup, pali.get_alignment_length() - cur_len:].trim())) return primers
def _find_primers(self, alignment, plen = 25, max_mismatches = 3, min_match_mismatches = 3, first_match_mismatches = 3, first_may_match = 0, AT_first = True, outgroup=1): primers = [] if isinstance(plen, (tuple, list)): min_len = plen[0] max_len = plen[1] else: min_len = max_len = plen for s in alignment.cols: if s < min_len-1: continue mlen = min_len mis = 0 mmis = min_match_mismatches pali = alignment[:,max(0,s-max_len):s] if AT_first and pali[0,-1].upper() not in 'AT': continue good = True cur_len = 0 first_matched = 0 for i in pali.rcols: cur_len += 1 col = pali[:,i] group = col[:-outgroup] if cur_len <= first_match_mismatches+first_matched: if cur_len <= first_may_match: if not issingleletter(group): good = False break elif not self._match_mismatch(col, outgroup): first_matched += 1 elif not self._match_mismatch(col, outgroup): good = False break continue if '-' in group: if issingleletter(group, '-'): mlen += 1 else: good = False break if self._match_mismatch(col, outgroup): mmis -= 1 if not issingleletter(group): mis += 1 if cur_len >= mlen and mmis <= 0 and mis <= max_mismatches: break good &= cur_len >= mlen and mmis <= 0 and mis <= max_mismatches if not good: continue primers.append((s+1, pali[:-outgroup,pali.get_alignment_length()-cur_len:].trim())) print '\nposition: %d\n%s' % primers[-1]#test return primers
def trim(self, gap = '-'): '''Trim alignment termina containing gaps in columns and columns containing only gaps''' #trim margins lmargin = 0 for i in self.cols: lmargin = i if gap not in self[:,i]: break if lmargin == self.get_alignment_length()-1: return AlignmentExt([]) rmargin = -1 for i in self.rcols: rmargin = i+1 if gap not in self[:,i]: break ali = self[:,lmargin:rmargin] #remove gaps-only cols = set(ali.cols) for i in ali.cols: if issingleletter(ali[:,i], gap): cols.remove(i) cols = sorted(cols) slices = [] s = 0 for i in xrange(len(cols)-1): if cols[i+1]-cols[i] > 1: slices.append(slice(s, cols[i]+1)) s = cols[i+1] slices.append(slice(s, cols[-1]+1)) final = ali[:,slices[0]] for s in slices[1:]: final += ali[:,s] return final
def trim(self, gap='-'): '''Trim alignment termina containing gaps in columns and columns containing only gaps''' #trim margins lmargin = 0 for i in self.cols: lmargin = i if gap not in self[:, i]: break if lmargin == self.get_alignment_length() - 1: return AlignmentExt([]) rmargin = -1 for i in self.rcols: rmargin = i + 1 if gap not in self[:, i]: break ali = self[:, lmargin:rmargin] #remove gaps-only cols = set(ali.cols) for i in ali.cols: if issingleletter(ali[:, i], gap): cols.remove(i) cols = sorted(cols) slices = [] s = 0 for i in xrange(len(cols) - 1): if cols[i + 1] - cols[i] > 1: slices.append(slice(s, cols[i] + 1)) s = cols[i + 1] slices.append(slice(s, cols[-1] + 1)) final = ali[:, slices[0]] for s in slices[1:]: final += ali[:, s] return final
def find_specific_primers(alignment, plen=25, max_mismatches=3, min_first_matches=3, AT_first=True, reverse=False): primers = [] if reverse: alignment = alignment.reverse_complement() ali_len = alignment.get_alignment_length() if isinstance(plen, (tuple, list)): min_len = plen[0] max_len = plen[1] else: min_len = max_len = plen for s in alignment.cols: if s < min_len - 1: continue mlen = min_len mis = 0 pali = alignment[:, max(0, s - max_len):s] if AT_first and pali[0, -1].upper() not in 'AT': continue good = True cur_len = 0 for i in pali.rcols: cur_len += 1 col = pali[:, i] if cur_len <= min_first_matches: if not issingleletter(col): good = False break continue if '-' in col: if issingleletter(col, '-'): mlen += 1 else: good = False break if not issingleletter(col): mis += 1 if mis > max_mismatches: if cur_len > mlen: cur_len -= 1 else: good = False break if not good: continue pali_len = pali.get_alignment_length() if not issingleletter(pali[:, pali_len - cur_len]): cur_len -= 1 primers.append((ali_len - s if reverse else s + 1, pali[:, pali_len - cur_len:].trim())) return primers
def _match_mismatch(self, s, outgroup=1): if not s: return False return (issingleletter(s[:-outgroup]) and not s[0] in s[-outgroup:])
def _match_mismatch(s, outgroup=1): if not s: return False return (issingleletter(s[:-outgroup]) and not s[0] in s[-outgroup:])