def test_ngram(self): self.assertJson(query.ngram("str"), '{"ngram":"str"}') self.assertJson(query.ngram(["str0", "str1"]), '{"ngram":["str0","str1"]}') self.assertJson(query.ngram("str", min=2, max=3), '{"max":3,"min":2,"ngram":"str"}') self.assertJson(query.ngram(["str0", "str1"], min=2, max=3), '{"max":3,"min":2,"ngram":["str0","str1"]}')
def test_ngram(self): self.assertEqual(self._q(query.ngram("what")), ["w", "wh", "h", "ha", "a", "at", "t"]) self.assertEqual(self._q(query.ngram("what", min=2, max=3)), ["wh", "wha", "ha", "hat", "at"]) self.assertEqual( self._q(query.ngram(["john", "doe"])), ["j", "jo", "o", "oh", "h", "hn", "n", "d", "do", "o", "oe", "e"]) self.assertEqual(self._q(query.ngram(["john", "doe"], min=3, max=4)), ["joh", "john", "ohn", "doe"])
def wordPartsGenerator(word): return q.let( { "indexes": q.map_( # Reduce this array if you want less ngrams per word. # Setting it to [ 0 ] would only create the word itself, Setting it to [0, 1] would result in the word itself # and all ngrams that are one character shorter, etc.. lambda index: q.subtract(q.length(word), index), maxNgrams), "indexesFiltered": q.filter_( # left min parts length 3 lambda l: q.gte(l, 3), q.var('indexes')), "ngramsArray": q.distinct( q.union( q.map_(lambda l: q.ngram(q.lowercase(word), l, l), q.var('indexesFiltered')))) }, q.var('ngramsArray'))