def test_list(self): word_list = ['foo', 'bar', 'bazbaz'] results = ngrams.breakup_list(word_list, 2, 3) self.assertSameSet(results, [ 'fo', 'oo', 'foo', 'ba', 'ar', 'bar', 'az', 'zb', 'baz', 'azb', 'zba'])
def test_list(self): word_list = ['foo', 'bar', 'bazbaz'] results = ngrams.breakup_list(word_list, 2, 3) self.assertSameSet(results, [ 'fo', 'oo', 'foo', 'ba', 'ar', 'bar', 'az', 'zb', 'baz', 'azb', 'zba' ])
def test_memory(self): # make sure we aren't leaking memory in our C module gc.collect() start_count = len(gc.get_objects()) results = ngrams.breakup_list(['foo', 'bar', 'bazbaz'], 1, 3) results2 = ngrams.breakup_word('miroiscool', 1, 3) del results del results2 gc.collect() end_count = len(gc.get_objects()) self.assertEquals(start_count, end_count)
def _ngrams_for_item(item_info): """Given an ItemInfo, return a list of N-grams contained.""" return ngrams.breakup_list(item_info.search_terms, NGRAM_MIN, NGRAM_MAX)
def test_list(self): word_list = ["foo", "bar", "bazbaz"] results = ngrams.breakup_list(word_list, 2, 3) self.assertSameSet(results, ["fo", "oo", "foo", "ba", "ar", "bar", "az", "zb", "baz", "azb", "zba"])
def calc_ngrams(item_info): """Get the N-grams that we want to index for a ItemInfo object""" words = WORDMATCHER.findall(_calc_search_text(item_info)) return ngrams.breakup_list(words, 1, NGRAM_MAX)