Ejemplo n.º 1
0
 def test_memory(self):
     # make sure we aren't leaking memory in our C module
     gc.collect()
     start_count = len(gc.get_objects())
     results = ngrams.breakup_list(['foo', 'bar', 'bazbaz'], 1, 3)
     results2 = ngrams.breakup_word('miroiscool', 1, 3)
     del results
     del results2
     gc.collect()
     end_count = len(gc.get_objects())
     self.assertEquals(start_count, end_count)
Ejemplo n.º 2
0
def _ngrams_for_term(term):
    """Given a term, return a list of N-grams that we should search for.

    If the term is shorter than NGRAM_MAX, this is just the term itself.
    If it's longer, we split it up into a bunch of N-grams to search for.
    """
    if len(term) <= NGRAM_MAX:
        return [term]
    else:
        # Note that we only need to use the longest N-grams, since shorter
        # N-grams will just be substrings of those.
        return ngrams.breakup_word(term, NGRAM_MAX, NGRAM_MAX)
Ejemplo n.º 3
0
    def _ngrams_for_term(self, term):
        """Given a term, return a list of N-grams that we should search for.

        If the term is shorter than NGRAM_MAX, this is just the term itself.
        If it's longer, we split it up into a bunch of N-grams to search for.
        """
        if len(term) <= NGRAM_MAX:
            return [term]
        else:
            # Note that we only need to use the longest N-grams, since shorter
            # N-grams will just be substrings of those.
            return ngrams.breakup_word(term, NGRAM_MAX, NGRAM_MAX)
Ejemplo n.º 4
0
def _ngrams_for_term(term):
    """Given a term, return a list of N-grams that we should search for.

    If the term is shorter than NGRAM_MAX, this is just the term itself.
    If it's longer, we split it up into a bunch of N-grams to search for.
    """
    if len(term) < NGRAM_MIN:
        # term is shorter than our smallest ngrams, return an empty list,
        # which causes us to match everything
        return []
    elif len(term) <= NGRAM_MAX:
        # normal case, search for term in using the N-grams we've calculated
        return [term]
    else:
        # term is longer than our longest N-grams, try the best we can using
        # substrings of term.  We only need to use the longest N-grams, since
        # shorter N-grams will just be substrings of those.
        return ngrams.breakup_word(term, NGRAM_MAX, NGRAM_MAX)
Ejemplo n.º 5
0
def _ngrams_for_term(term):
    """Given a term, return a list of N-grams that we should search for.

    If the term is shorter than NGRAM_MAX, this is just the term itself.
    If it's longer, we split it up into a bunch of N-grams to search for.
    """
    if len(term) < NGRAM_MIN:
        # term is shorter than our smallest ngrams, return an empty list,
        # which causes us to match everything
        return []
    elif len(term) <= NGRAM_MAX:
        # normal case, search for term in using the N-grams we've calculated
        return [term]
    else:
        # term is longer than our longest N-grams, try the best we can using
        # substrings of term.  We only need to use the longest N-grams, since
        # shorter N-grams will just be substrings of those.
        return ngrams.breakup_word(term, NGRAM_MAX, NGRAM_MAX)
Ejemplo n.º 6
0
 def test_simple(self):
     results = ngrams.breakup_word('foobar', 2, 3)
     self.assertSameSet(results, [
         'fo', 'oo', 'ob', 'ba', 'ar',
         'foo', 'oob', 'oba', 'bar'])
Ejemplo n.º 7
0
 def test_simple(self):
     results = ngrams.breakup_word("foobar", 2, 3)
     self.assertSameSet(results, ["fo", "oo", "ob", "ba", "ar", "foo", "oob", "oba", "bar"])
Ejemplo n.º 8
0
 def test_simple(self):
     results = ngrams.breakup_word('foobar', 2, 3)
     self.assertSameSet(
         results,
         ['fo', 'oo', 'ob', 'ba', 'ar', 'foo', 'oob', 'oba', 'bar'])