def test_memory(self): # make sure we aren't leaking memory in our C module gc.collect() start_count = len(gc.get_objects()) results = ngrams.breakup_list(['foo', 'bar', 'bazbaz'], 1, 3) results2 = ngrams.breakup_word('miroiscool', 1, 3) del results del results2 gc.collect() end_count = len(gc.get_objects()) self.assertEquals(start_count, end_count)
def _ngrams_for_term(term): """Given a term, return a list of N-grams that we should search for. If the term is shorter than NGRAM_MAX, this is just the term itself. If it's longer, we split it up into a bunch of N-grams to search for. """ if len(term) <= NGRAM_MAX: return [term] else: # Note that we only need to use the longest N-grams, since shorter # N-grams will just be substrings of those. return ngrams.breakup_word(term, NGRAM_MAX, NGRAM_MAX)
def _ngrams_for_term(self, term): """Given a term, return a list of N-grams that we should search for. If the term is shorter than NGRAM_MAX, this is just the term itself. If it's longer, we split it up into a bunch of N-grams to search for. """ if len(term) <= NGRAM_MAX: return [term] else: # Note that we only need to use the longest N-grams, since shorter # N-grams will just be substrings of those. return ngrams.breakup_word(term, NGRAM_MAX, NGRAM_MAX)
def _ngrams_for_term(term): """Given a term, return a list of N-grams that we should search for. If the term is shorter than NGRAM_MAX, this is just the term itself. If it's longer, we split it up into a bunch of N-grams to search for. """ if len(term) < NGRAM_MIN: # term is shorter than our smallest ngrams, return an empty list, # which causes us to match everything return [] elif len(term) <= NGRAM_MAX: # normal case, search for term in using the N-grams we've calculated return [term] else: # term is longer than our longest N-grams, try the best we can using # substrings of term. We only need to use the longest N-grams, since # shorter N-grams will just be substrings of those. return ngrams.breakup_word(term, NGRAM_MAX, NGRAM_MAX)
def test_simple(self): results = ngrams.breakup_word('foobar', 2, 3) self.assertSameSet(results, [ 'fo', 'oo', 'ob', 'ba', 'ar', 'foo', 'oob', 'oba', 'bar'])
def test_simple(self): results = ngrams.breakup_word("foobar", 2, 3) self.assertSameSet(results, ["fo", "oo", "ob", "ba", "ar", "foo", "oob", "oba", "bar"])
def test_simple(self): results = ngrams.breakup_word('foobar', 2, 3) self.assertSameSet( results, ['fo', 'oo', 'ob', 'ba', 'ar', 'foo', 'oob', 'oba', 'bar'])