def test_04_big_phrase_autocomplete(self): nums = { ('i', ): [0, 1, 2, 5, 11, None], ('i', 'do'): [0, 1, 2, 5, 8, None], ('i', 'do', 'not', 'like', 'them'): [0, 1, 2, 4, 100, None], ('i', 'do', 'not', 'like', 'them', 'here'): [0, 1, 2, 100, None] } with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'seuss.txt'), encoding='utf-8') as f: text = f.read() p = lab.make_phrase_trie(text) for i in sorted(nums): for n in nums[i]: result = lab.autocomplete(p, i, n) expected = read_expected('seuss_autocomplete_%s_%s.pickle' % (len(i), n)) self.assertEqual(len(result), len(expected), msg='wrong autocomplete of ' + repr(i) + ' with maxcount = ' + str(n)) self.assertEqual(set(result), set(expected), msg='wrong autocomplete of ' + repr(i) + ' with maxcount = ' + str(n)) with self.assertRaises(TypeError): result = lab.autocomplete(p, 'string', None)
def get_completions(args): tries = corpusTries[args["corpus"]] mode = args["trie_mode"] if mode in ("words", "pattern"): trie = tries[0] prefix = args["prefix"] else: trie = tries[1] prefix = tuple(args["prefix"].split()) max_results = args["max_results"] if max_results == 0: max_results = None if mode == "pattern": r = [ word for word, freq in lab.word_filter(trie, prefix)[:max_results] ] elif args["autocorrect"] and mode == "words": r = lab.autocorrect(trie, prefix, max_results) else: if mode == "sentences": r = [ ' '.join(result) for result in lab.autocomplete(trie, prefix, max_results) ] else: r = lab.autocomplete(trie, prefix, max_results) return r
def test_02_big_autocomplete(self): nums = { 't': [0, 1, 25, None], 'th': [0, 1, 21, None], 'the': [0, 5, 21, None], 'thes': [0, 1, 21, None] } with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'frankenstein.txt'), encoding='utf-8') as f: text = f.read() w = lab.make_word_trie(text) for i in sorted(nums): for n in nums[i]: result = lab.autocomplete(w, i, n) expected = read_expected('frank_autocomplete_%s_%s.pickle' % (i, n)) self.assertEqual(len(result), len(expected), msg='wrong autocomplete of ' + repr(i) + ' with maxcount = ' + str(n)) self.assertEqual(set(result), set(expected), msg='wrong autocomplete of ' + repr(i) + ' with maxcount = ' + str(n)) with self.assertRaises(TypeError): result = lab.autocomplete(w, ('tuple', ), None)
def test_autocomplete_big_2(): nums = { 't': [0, 1, 25, None], 'th': [0, 1, 21, None], 'the': [0, 5, 21, None], 'thes': [0, 1, 21, None] } with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'frankenstein.txt'), encoding='utf-8') as f: text = f.read() w = lab.make_word_trie(text) for i in sorted(nums): for n in nums[i]: result = lab.autocomplete(w, i, n) expected = read_expected('frank_autocomplete_%s_%s.pickle' % (i, n)) assert len(expected) == len( result), ('missing' if len(result) < len(expected) else 'too many') + ' autocomplete results for ' + repr( i) + ' with maxcount = ' + str(n) assert set(expected) == set( result), 'autocomplete included ' + repr( set(result) - set(expected)) + ' instead of ' + repr( set(expected) - set(result)) + ' for ' + repr( i) + ' with maxcount = ' + str(n) with pytest.raises(TypeError): result = lab.autocomplete(w, ('tuple', ), None)
def test_autocomplete_big_phrase(): nums = { ('i', ): [0, 1, 2, 5, 11, None], ('i', 'do'): [0, 1, 2, 5, 8, None], ('i', 'do', 'not', 'like', 'them'): [0, 1, 2, 4, 100, None], ('i', 'do', 'not', 'like', 'them', 'here'): [0, 1, 2, 100, None] } with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'seuss.txt'), encoding='utf-8') as f: text = f.read() p = lab.make_phrase_trie(text) for i in sorted(nums): for n in nums[i]: result = lab.autocomplete(p, i, n) expected = read_expected('seuss_autocomplete_%s_%s.pickle' % (len(i), n)) assert len(expected) == len( result), ('missing' if len(result) < len(expected) else 'too many') + ' autocomplete results for ' + repr( i) + ' with maxcount = ' + str(n) assert set(expected) == set( result), 'autocomplete included ' + repr( set(result) - set(expected)) + ' instead of ' + repr( set(expected) - set(result)) + ' for ' + repr( i) + ' with maxcount = ' + str(n) with pytest.raises(TypeError): result = lab.autocomplete(p, 'string', None)
def test_02_big_autocomplete_1(self): alphabet = a = "abcdefghijklmnopqrstuvwxyz" word_list = [ "aa" + l1 + l2 + l3 + l4 for l1 in a for l2 in a for l3 in a for l4 in a ] word_list.extend( ["apple", "application", "apple", "apricot", "apricot", "apple"]) word_list.append("bruteforceisbad") trie = lab.make_word_trie(' '.join(word_list)) for i in range(10): result1 = lab.autocomplete(trie, 'ap', 1) result2 = lab.autocomplete(trie, 'ap', 2) result3 = lab.autocomplete(trie, 'ap', 3) result4 = lab.autocomplete(trie, 'ap') self.assertEqual(1, len(result1)) self.assertEqual(2, len(result2)) self.assertEqual(3, len(result3)) self.assertEqual(3, len(result4)) self.assertEqual(["apple"], result1) self.assertEqual(set(["apple", "apricot"]), set(result2)) self.assertEqual(set(["apple", "apricot", "application"]), set(result3)) self.assertEqual(set(result4), set(result3))
def test_03_big_autocomplete_2(self): with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'frankenstein.txt'), encoding='utf-8') as f: text = f.read() w = lab.make_word_trie(text) the_word = 'accompany' for ix in range(len(the_word)+1): test = the_word[:ix] result = lab.autocomplete(w, test) expected = read_expected('frank_autocomplete_%s_%s.pickle' % (test, None)) self.assertEqual(len(result), len(expected), msg='wrong autocomplete of '+repr(test)) self.assertEqual(set(result), set(expected), msg='wrong autocomplete of '+repr(test)) with self.assertRaises(TypeError): result = lab.autocomplete(w, ('tuple', ), None)
def test_04_big_autocomplete_3(self): with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'frankenstein.txt'), encoding='utf-8') as f: text = f.read() w = lab.make_word_trie(text) the_word = 'accompany' for ix in range(len(the_word)+1): test = the_word[:ix] result = lab.autocomplete(w, test) expected = read_expected('frank_autocomplete_%s_%s.pickle' % (test, None)) self.assertEqual(len(expected), len(result), msg=('missing' if len(result) < len(expected)\ else 'too many') + ' autocomplete results for ' + repr(test) + ' with maxcount = None') self.assertEqual(set(expected), set(result), msg='autocomplete included ' + repr(set(result) - set(expected))\ + ' instead of ' + repr(set(expected) - set(result)) + ' for ' + repr(test) + ' with maxcount = None') with self.assertRaises(TypeError): result = lab.autocomplete(w, ('tuple', ), None)
def test_tiny2(self): trie = lab.make_word_trie('do down down drown drown drown doing doing \ done done done dead dead dead dead at and cat cat car car car' ) result = lab.autocomplete(trie, 'do', 2) expect = ['done', 'down'] self.assertEqual(result, expect) result = lab.autocomplete(trie, 'd', 3) expect = ['dead', 'done', 'drown'] self.assertEqual(result, expect) result = sorted(lab.autocomplete(trie, 'do', None)) expect = sorted(['done', 'down', 'doing', 'do']) self.assertEqual(result, expect)
def test_01_autocomplete(self): # Autocomplete on simple trie with less than N valid words trie = lab.make_word_trie("cat car carpet") result = lab.autocomplete(trie, 'car', 3) self.assertIsInstance(result, list, "result not a list.") for w in result: self.assertIsInstance(w, str, "expecting list of strings.") result.sort() expect = ["car", "carpet"] self.assertEqual(result, expect, msg="incorrect result from autocomplete.") trie = lab.make_word_trie("a an ant anteater a an ant a") result = lab.autocomplete(trie, 'a', 2) self.assertIsInstance(result, list, "result not a list.") for w in result: self.assertIsInstance(w, str, "expecting list of strings.") result.sort() expect_one_of = [["a", "an"], ["a", "ant"]] self.assertIn(result, expect_one_of, msg="incorrect result from autocomplete.") trie = lab.make_word_trie( "man mat mattress map me met a man a a a map man met") result = lab.autocomplete(trie, 'm', 3) self.assertIsInstance(result, list, "result not a list.") for w in result: self.assertIsInstance(w, str, "expecting list of strings.") result.sort() expect = ["man", "map", "met"] self.assertEqual(result, expect, msg="incorrect result from autocomplete.") trie = lab.make_word_trie("hello hell history") result = lab.autocomplete(trie, 'help', 3) self.assertIsInstance(result, list, "result not a list.") for w in result: self.assertIsInstance(w, str, "expecting list of strings.") expect = [] self.assertEqual(result, expect, msg="incorrect result from autocomplete.") with self.assertRaises(TypeError): result = lab.autocomplete(trie, ('tuple', ), None)
def run_test(input_data): trie = lab.generate_trie(input_data["words"]) if input_data["f"] == "autocorrect": # Tests for correct autocorrect return lab.autocorrect(trie, input_data["prefix"], input_data["N"]) if input_data["f"] == "autocomplete": # Tests for correct autocomplete return lab.autocomplete(trie, input_data["prefix"], input_data["N"]) else: # Tests just for producing the trie return trie
def run_test( input_data ): trie = lab.generate_trie(input_data["words"]) if input_data["f"] == "autocorrect": # Tests for correct autocorrect return lab.autocorrect(trie, input_data["prefix"], input_data["N"]) if input_data["f"] == "autocomplete": # Tests for correct autocomplete return lab.autocomplete(trie, input_data["prefix"], input_data["N"]) else: # Tests just for producing the trie return trie
def autocomplete(input_data): global trie if trie is None: words = [] print "LOADING CORPUS" with open("resources/words.json", "r") as f: words = json.load(f) trie = lab.generate_trie(words) return lab.autocomplete(trie, input_data["prefix"], input_data["N"])
def test_tiny1(self): trie = lab.make_word_trie("bat bat bark bar") result = lab.autocomplete(trie, 'ba', 1) expect = ['bat'] self.assertEqual(result, expect) result = sorted(lab.autocomplete(trie, 'ba', 2)) expect = sorted(['bat', 'bar']) self.assertEqual(result, expect) result = lab.autocomplete(trie, 'c', 2) expect = [] self.assertEqual(result, expect) result = lab.autocomplete(trie, 'b', None) expect = ['bat', 'bar', 'bark'] self.assertEqual(result, expect)
def autocomplete( input_data ): global trie if trie is None: words = [] print "LOADING CORPUS" with open("resources/words.json", "r") as f: words = json.load(f) trie = lab.generate_trie(words) return lab.autocomplete(trie, input_data["prefix"], input_data["N"])
def test_autocomplete_small(): # Autocomplete on simple tries with less than N valid words trie = lab.make_word_trie("cat car carpet") result = lab.autocomplete(trie, 'car', 3) assert set(result) == {"car", "carpet"} trie = lab.make_word_trie("a an ant anteater a an ant a") result = lab.autocomplete(trie, 'a', 2) assert set(result) in [{"a", "an"}, {"a", "ant"}] trie = lab.make_word_trie( "man mat mattress map me met a man a a a map man met") result = lab.autocomplete(trie, 'm', 3) assert set(result) == {"man", "map", "met"} trie = lab.make_word_trie("hello hell history") result = lab.autocomplete(trie, 'help', 3) assert result == [] with pytest.raises(TypeError): result = lab.autocomplete(trie, ('tuple', ), None)
def test_autocomplete_big_1(): alphabet = a = "abcdefghijklmnopqrstuvwxyz" word_list = [ "aa" + l1 + l2 + l3 + l4 for l1 in a for l2 in a for l3 in a for l4 in a ] word_list.extend( ["apple", "application", "apple", "apricot", "apricot", "apple"]) word_list.append("bruteforceisbad") trie = lab.make_word_trie(' '.join(word_list)) for i in range(1000): result1 = lab.autocomplete(trie, 'ap', 1) result2 = lab.autocomplete(trie, 'ap', 2) result3 = lab.autocomplete(trie, 'ap', 3) result4 = lab.autocomplete(trie, 'ap') assert set(result1) == {'apple'} assert set(result2) == {'apple', 'apricot'} assert set(result4) == set(result3) == { 'apple', 'apricot', 'application' }