def search(word1, word2, k): w1 = norm_word(word1) w2 = norm_word(word2) if w1 not in vocab or w2 not in vocab: return [] return positionAND(post[vocab[w1]['id']], post[vocab[w2]['id']], int(k))
def get_postings(self, word): if not matches(word): return [] term = norm_word(word) return get_postings(term, self.table, self.vocab_str, self.posting_bytes, self.added_zeros, self.decode, self.encoding)
def get_postings(self, word): if not matches(word): return [] i = self.find_pos(norm_word(word)) if i == -1: return [] posting_bytes = self.get_postings_bytes(i) return self.decode_postings(posting_bytes)
def get_postings(self, word): if not matches(word): return [] term = norm_word(word) return get_postings(term, self.table, self.vocab_str, self.postings_str, fromGammaStr)
def word(self, ast): ast_norm = norm_word(ast) if ast_norm not in self.vocabulary: raise Exception('Vocabulary doesn`t contain word ' + ast) return self.postings[self.vocabulary[ast_norm]['id']]
def filter_word(word): if not re.match(word_regex, word.replace('*', '')): return '' return norm_word(word)