Esempio n. 1
0
 def search(word1, word2, k):
     w1 = norm_word(word1)
     w2 = norm_word(word2)
     if w1 not in vocab or w2 not in vocab:
         return []
     return positionAND(post[vocab[w1]['id']], post[vocab[w2]['id']],
                        int(k))
 def get_postings(self, word):
     if not matches(word):
         return []
     term = norm_word(word)
     return get_postings(term, self.table, self.vocab_str,
                         self.posting_bytes, self.added_zeros, self.decode,
                         self.encoding)
 def get_postings(self, word):
     if not matches(word):
         return []
     i = self.find_pos(norm_word(word))
     if i == -1:
         return []
     posting_bytes = self.get_postings_bytes(i)
     return self.decode_postings(posting_bytes)
 def get_postings(self, word):
     if not matches(word):
         return []
     term = norm_word(word)
     return get_postings(term, self.table, self.vocab_str,
                         self.postings_str, fromGammaStr)
 def word(self, ast):
     ast_norm = norm_word(ast)
     if ast_norm not in self.vocabulary:
         raise Exception('Vocabulary doesn`t contain word ' + ast)
     return self.postings[self.vocabulary[ast_norm]['id']]
Esempio n. 6
0
def filter_word(word):
    if not re.match(word_regex, word.replace('*', '')):
        return ''
    return norm_word(word)