def test_ngram(self):
     self.assertJson(query.ngram("str"), '{"ngram":"str"}')
     self.assertJson(query.ngram(["str0", "str1"]),
                     '{"ngram":["str0","str1"]}')
     self.assertJson(query.ngram("str", min=2, max=3),
                     '{"max":3,"min":2,"ngram":"str"}')
     self.assertJson(query.ngram(["str0", "str1"], min=2, max=3),
                     '{"max":3,"min":2,"ngram":["str0","str1"]}')
Beispiel #2
0
    def test_ngram(self):
        self.assertEqual(self._q(query.ngram("what")),
                         ["w", "wh", "h", "ha", "a", "at", "t"])
        self.assertEqual(self._q(query.ngram("what", min=2, max=3)),
                         ["wh", "wha", "ha", "hat", "at"])

        self.assertEqual(
            self._q(query.ngram(["john", "doe"])),
            ["j", "jo", "o", "oh", "h", "hn", "n", "d", "do", "o", "oe", "e"])
        self.assertEqual(self._q(query.ngram(["john", "doe"], min=3, max=4)),
                         ["joh", "john", "ohn", "doe"])
Beispiel #3
0
def wordPartsGenerator(word):
    return q.let(
        {
            "indexes":
            q.map_(
                # Reduce this array if you want less ngrams per word.
                # Setting it to [ 0 ] would only create the word itself, Setting it to [0, 1] would result in the word itself
                # and all ngrams that are one character shorter, etc..
                lambda index: q.subtract(q.length(word), index),
                maxNgrams),
            "indexesFiltered":
            q.filter_(
                # left min parts length 3
                lambda l: q.gte(l, 3),
                q.var('indexes')),
            "ngramsArray":
            q.distinct(
                q.union(
                    q.map_(lambda l: q.ngram(q.lowercase(word), l, l),
                           q.var('indexesFiltered'))))
        },
        q.var('ngramsArray'))