-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_counter.py
98 lines (84 loc) · 4.26 KB
/
test_counter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import unittest
import counter
# NOTE: besides these, I did "integration" testing on the commandline with
# large text files. I didn't bother to include those tests here for a few
# reasons, including speed and not being 100% sure what the true trigram
# counts were for the texts. I didn't want to assume assertion values I
# couldn't prove.
class Case(unittest.TestCase):
def test_empty_string_should_return_empty_dict(self):
output = counter.count([""])
self.assertEqual({}, output)
def test_super_simple_three_word_input(self):
test_input = "super simple test"
output = counter.count([test_input])
self.assertTrue("super simple test" in output)
self.assertEqual(output["super simple test"], 1)
self.assertEqual(len(output.keys()), 1) # total trigrams expected
def test_super_simple_mixed_case(self):
test_input = "super simple test SUPER SIMPLE TEST"
output = counter.count([test_input])
self.assertTrue("super simple test" in output)
self.assertEqual(output["super simple test"], 2)
self.assertEqual(len(output.keys()), 3)
def test_two_trigrams(self):
test_input = "this has two trigrams"
output = counter.count([test_input])
self.assertTrue("this has two" in output)
self.assertTrue("has two trigrams" in output)
self.assertEqual(output["this has two"], 1)
self.assertEqual(output["has two trigrams"], 1)
# negative tests to make sure our gram "window" isn't sliding too far
self.assertTrue("two trigrams" not in output)
self.assertTrue("trigrams" not in output)
self.assertEqual(len(output.keys()), 2)
def test_three_trigrams(self):
test_input = "this has three trigrams now"
output = counter.count([test_input])
self.assertTrue("this has three" in output)
self.assertTrue("has three trigrams" in output)
self.assertTrue("three trigrams now" in output)
self.assertEqual(output["this has three"], 1)
self.assertEqual(output["has three trigrams"], 1)
self.assertEqual(output["three trigrams now"], 1)
# negative tests to make sure our gram "window" isn't sliding too far
self.assertTrue("trigrams now" not in output)
self.assertTrue("now" not in output)
self.assertEqual(len(output.keys()), 3)
def test_simple_repeated_trigram(self):
test_input = "apples are tasty and also apples are tasty"
output = counter.count([test_input])
self.assertTrue("apples are tasty" in output)
self.assertEqual(output["apples are tasty"], 2)
self.assertEqual(len(output.keys()), 5)
def test_strip_special_chars(self):
test_input = r"super,simple.test:a;few!more&words\"here'and?there"
output = counter.count([test_input])
self.assertTrue("super simple test" in output)
self.assertEqual(output["super simple test"], 1)
self.assertTrue("simple test a" in output)
self.assertEqual(output["simple test a"], 1)
self.assertTrue("test a few" in output)
self.assertEqual(output["test a few"], 1)
self.assertTrue("a few more" in output)
self.assertEqual(output["a few more"], 1)
self.assertTrue("few more words" in output)
self.assertEqual(output["few more words"], 1)
self.assertTrue("more words here" in output)
self.assertEqual(output["more words here"], 1)
self.assertTrue("words here and" in output)
self.assertEqual(output["words here and"], 1)
self.assertTrue("here and there" in output)
self.assertEqual(output["here and there"], 1)
@unittest.skip("Set to ignore because this takes upwards of 5 minutes to run")
def test_extremely_large_input(self):
""" This was for doing a convenient in-memory stress test without having to
gather real files """
test_input = ""
# This will produce a string the size of just over 54 copies of Origin of Species
total_occurrences = 5000000
for i in range(total_occurrences):
test_input += "big text here "
output = counter.count([test_input])
self.assertTrue("big text here" in output)
self.assertEqual(output["big text here"], total_occurrences)