Exemple #1
0
 def test_04_big_phrase_autocomplete(self):
     nums = {
         ('i', ): [0, 1, 2, 5, 11, None],
         ('i', 'do'): [0, 1, 2, 5, 8, None],
         ('i', 'do', 'not', 'like', 'them'): [0, 1, 2, 4, 100, None],
         ('i', 'do', 'not', 'like', 'them', 'here'): [0, 1, 2, 100, None]
     }
     with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'seuss.txt'),
               encoding='utf-8') as f:
         text = f.read()
     p = lab.make_phrase_trie(text)
     for i in sorted(nums):
         for n in nums[i]:
             result = lab.autocomplete(p, i, n)
             expected = read_expected('seuss_autocomplete_%s_%s.pickle' %
                                      (len(i), n))
             self.assertEqual(len(result),
                              len(expected),
                              msg='wrong autocomplete of ' + repr(i) +
                              ' with maxcount = ' + str(n))
             self.assertEqual(set(result),
                              set(expected),
                              msg='wrong autocomplete of ' + repr(i) +
                              ' with maxcount = ' + str(n))
     with self.assertRaises(TypeError):
         result = lab.autocomplete(p, 'string', None)
Exemple #2
0
    def test_02_phrase_trie(self):
        # small test
        l = lab.make_phrase_trie('toonces was a cat who could drive a car very fast until he crashed.')
        expected = read_expected('9.pickle')
        self.assertEqual(expected, dictify(l))

        l = lab.make_phrase_trie('a man at the market murmered that he had met a mermaid. '
                                 'i dont believe that he had met a mermaid.')
        expected = read_expected('10.pickle')
        self.assertEqual(expected, dictify(l))

        l = lab.make_phrase_trie(('What happened to the cat who ate the ball of yarn?  She had mittens!  '
                                   'What happened to the frog who was double parked?  He got toad!  '
                                   'What happened yesterday?  I dont remember.'))
        expected = read_expected('11.pickle')
        self.assertEqual(expected, dictify(l))
Exemple #3
0
def test_autocomplete_big_phrase():
    nums = {
        ('i', ): [0, 1, 2, 5, 11, None],
        ('i', 'do'): [0, 1, 2, 5, 8, None],
        ('i', 'do', 'not', 'like', 'them'): [0, 1, 2, 4, 100, None],
        ('i', 'do', 'not', 'like', 'them', 'here'): [0, 1, 2, 100, None]
    }
    with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'seuss.txt'),
              encoding='utf-8') as f:
        text = f.read()
    p = lab.make_phrase_trie(text)
    for i in sorted(nums):
        for n in nums[i]:
            result = lab.autocomplete(p, i, n)
            expected = read_expected('seuss_autocomplete_%s_%s.pickle' %
                                     (len(i), n))
            assert len(expected) == len(
                result), ('missing' if len(result) < len(expected) else
                          'too many') + ' autocomplete results for ' + repr(
                              i) + ' with maxcount = ' + str(n)
            assert set(expected) == set(
                result), 'autocomplete included ' + repr(
                    set(result) - set(expected)) + ' instead of ' + repr(
                        set(expected) - set(result)) + ' for ' + repr(
                            i) + ' with maxcount = ' + str(n)

    with pytest.raises(TypeError):
        result = lab.autocomplete(p, 'string', None)
def load_corpus_file(path):
    corpus_name = ''.join(os.path.basename(path).split('.')[:-1])
    with open(path, encoding="utf-8") as f:
        text = f.read()
        wordTrie = lab.make_word_trie(text)
        sentenceTrie = lab.make_phrase_trie(text)
    corpusTries[corpus_name] = (wordTrie, sentenceTrie)
    return corpus_name
Exemple #5
0
    def test_03_big_corpora(self):
        for bigtext in ('holmes', 'earnest', 'frankenstein'):
            with open(os.path.join(TEST_DIRECTORY, 'testing_data', '%s.txt' % bigtext), encoding='utf-8') as f:
                text = f.read()
                w = lab.make_word_trie(text)
                p = lab.make_phrase_trie(text)

                w_e = read_expected('%s_words.pickle' % bigtext)
                p_e = read_expected('%s_phrases.pickle' % bigtext)

                self.assertEqual(w_e, dictify(w), 'word trie does not match for '+bigtext)
                self.assertEqual(p_e, dictify(p), 'phrase trie does not match for '+bigtext)
Exemple #6
0
def test_big_corpora(bigtext):
    with open(os.path.join(TEST_DIRECTORY, 'testing_data', '%s.txt' % bigtext),
              encoding='utf-8') as f:
        text = f.read()
        w = lab.make_word_trie(text)
        p = lab.make_phrase_trie(text)

        w_e = read_expected('%s_words.pickle' % bigtext)
        p_e = read_expected('%s_phrases.pickle' % bigtext)

        assert w_e == dictify(w), 'word trie does not match for %s' % bigtext
        assert p_e == dictify(p), 'phrase trie does not match for %s' % bigtext