def index_search(self, pattern): padding = self.depth - len(pattern) if padding > 0: start_key = pattern + ("A" * padding) start_int = self.kmer_to_int(start_key) - 1 end_key = pattern + ("T" * padding) end_int = self.kmer_to_int(end_key) + 1 else: start_key = pattern[0:self.depth] start_int = self.kmer_to_int(start_key) end_int = start_int + 1 while not start_int in self.index_dict and start_int >= self.lower_bound: start_int -= 1 start = self.index_dict[ start_int] if start_int in self.index_dict else 0 while not end_int in self.index_dict and end_int <= self.upper_bound: end_int += 1 end = self.index_dict[end_int] if end_int in self.index_dict else len( self.suffix_array) - 1 return sa_utils.simple_accelerant(self.corpus_str, self.suffix_array, pattern, initL=start, initR=end)
def search(self, search_string): """ return: collection (list) int of offsets in original string where substring can be found """ if not self.sa or not self.lcp: raise ValueError else: search_string = search_string if not self.force_unicode else unicode(search_string, 'utf-8', 'replace') sa_i = sa_utils.simple_accelerant(self.corpus_str, self.sa, search_string) return sa_utils.lcp_scan(self.lcp, self.sa, search_string, sa_i) if sa_i >= 0 else []
def search(self, search_string): """ return: collection (list) int of offsets in original string where substring can be found """ if not self.sa or not self.lcp: raise ValueError else: search_string = search_string if not self.force_unicode else unicode( search_string, 'utf-8', 'replace') sa_i = sa_utils.simple_accelerant(self.corpus_str, self.sa, search_string) return sa_utils.lcp_scan(self.lcp, self.sa, search_string, sa_i) if sa_i >= 0 else []
def index_search(self, pattern): padding = self.depth - len(pattern) if padding > 0: start_key = pattern + ("A" * padding) start_int = self.kmer_to_int(start_key) - 1 end_key = pattern + ("T" * padding) end_int = self.kmer_to_int(end_key) + 1 else: start_key = pattern[0:self.depth] start_int = self.kmer_to_int(start_key) end_int = start_int + 1 while not start_int in self.index_dict and start_int >= self.lower_bound: start_int -= 1 start = self.index_dict[start_int] if start_int in self.index_dict else 0 while not end_int in self.index_dict and end_int <= self.upper_bound: end_int += 1 end = self.index_dict[end_int] if end_int in self.index_dict else len(self.suffix_array) - 1 return sa_utils.simple_accelerant(self.corpus_str, self.suffix_array, pattern, initL=start, initR=end)