コード例 #1
0
 def test_characters(self):
     rw = final.RandomWriter(2, final.Tokenization.character)
     rw.train_iterable("What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
                       "in action how like an angel! in apprehension how like a god! the beauty of the world, the paragon of animals!")
     self.assertIsInstance(next(iter(rw.generate())), str)
     self.assertContainsSequence(rw.generate(), "worm")
     self.assertNotContainsSequence(rw.generate(), "mals ")
コード例 #2
0
 def test_numeric_sequence_notin(self):
     rw = final.RandomWriter(2)
     rw.train_iterable((1, 2, 3, 4, 5, 5, 5, 4, 3, 2, 1, 2, 4, 5))
     self.assertNotContainsSequence(rw.generate(), [5, 5, 3])
     self.assertNotContainsSequence(rw.generate(), [1, 2, 5])
     self.assertNotContainsSequence(rw.generate(), [4, 2])
     self.assertNotContainsSequence(rw.generate(), ["5"])
コード例 #3
0
 def test_words(self):
     rw = final.RandomWriter(1, final.Tokenization.word)
     rw.train_iterable("the given iterable must contain the sequence the")
     self.assertNotContainsSequence(rw.generate(), "the the".split(" "))
     self.assertNotContainsSequence(rw.generate(), "the iterable".split(" "))
     self.assertContainsSequence(rw.generate(), "iterable must contain".split(" "), times=10)
     self.assertContainsSequence(rw.generate(), "the sequence".split(" "), times=200)
コード例 #4
0
 def test_numeric_sequence(self):
     rw = final.RandomWriter(2)
     rw.train_iterable((1,2,3,4,5,5,4,3,2,1))
     self.assertNotContainsSequence(rw.generate(), [5,5,3])
     self.assertNotContainsSequence(rw.generate(), [1,2,5])
     self.assertNotContainsSequence(rw.generate(), [2,4])
     self.assertContainsSequence(rw.generate(), [3,4,5,5,4,3,2], times=10)
コード例 #5
0
 def test_bytes_nonutf8(self):
     rw = final.RandomWriter(2, final.Tokenization.byte)
     rw.train_iterable(b"What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
                       b"in action how like an angel! in apprehension how like a god!\xff\xfe the beauty of the world, the paragon of animals!")
     self.assertTrue(isinstance(next(iter(rw.generate())), (int, bytes)))
     self.assertNotContainsSequence(rw.generate(), b"mals ")
     self.assertContainsSequence(rw.generate(), b"worm")
     self.assertContainsSequence(rw.generate(), b"!\xff\xfe")
コード例 #6
0
 def test_generate_count(self):
     rw = final.RandomWriter(2, final.Tokenization.character)
     rw.train_iterable(
         "What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
         "in action how like an angel! in apprehension how like a god! the beauty of the world, the paragon of animals!"
     )
     generated = len(list(itertools.islice(rw.generate(), 10000)))
     self.assertEqual(generated, 10000)
コード例 #7
0
 def test_train_iterator(self):
     rw = final.RandomWriter(1)
     rw.train_iterable(iter((1,2,3,4,5,5,5,4,3,2,1,2,4,5)))
     self.assertIsInstance(next(iter(rw.generate())), int)
     self.assertContainsSequence(rw.generate(), [3,4,5,5,4,3,2], times=10)
     self.assertContainsSequence(rw.generate(), [3,4,5,5,5,5,4,3,2])
     self.assertContainsSequence(rw.generate(), [5,5,5,5,5])
     self.assertContainsSequence(rw.generate(), [3,2,1,2,4,5,5,4])
     self.assertContainsSequence(rw.generate(), [3,2,1,2,3,4,5,5,4])
コード例 #8
0
 def test_generate_file_size(self):
     rw = final.RandomWriter(1, final.Tokenization.character)
     rw.train_iterable("abcaea")
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertGreaterEqual(len(content), self.DEFAULT_LENGTH)
         self.assertLessEqual(len(content), self.DEFAULT_LENGTH + 2)
コード例 #9
0
 def test_generate_file2(self):
     rw = final.RandomWriter(1, final.Tokenization.word)
     rw.train_iterable("a the word the")
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertNotContainsSequence(content, "the a")
         self.assertContainsSequence(content, "the word", times=100)
コード例 #10
0
 def test_generate_file3(self):
     rw = final.RandomWriter(2, final.Tokenization.none)
     rw.train_iterable((1, 2, 3, 4, 5, 5, 4, 3, 2, 1))
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertNotContainsSequence(content, "5 5 3")
         self.assertNotContainsSequence(content, "1 2 5")
         self.assertContainsSequence(content, "3 4 5 5 4 3 2", times=100)
コード例 #11
0
 def test_multiple_generators(self):
     rw = final.RandomWriter(2, final.Tokenization.character)
     rw.train_iterable("What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
                       "in action how like an angel! in apprehension how like a god! the beauty of the world, the paragon of animals!")
     self.assertIsInstance(next(iter(rw.generate())), str)
     g1 = rw.generate()
     g2 = rw.generate()
     ss = zip(*[(next(g1), next(g2)) for _ in range(self.DEFAULT_LENGTH)])
     for s in ss:
         self.assertContainsSequence(s, "worm")
         self.assertNotContainsSequence(s, "mals ")
コード例 #12
0
 def test_save_load_pickle(self):
     rw = final.RandomWriter(1, final.Tokenization.character)
     rw.train_iterable("abcaea")
     with nonexistant_filename() as fn:
         rw.save_pickle(fn)
         rw2 = final.RandomWriter.load_pickle(fn)
         self.assertNotContainsSequence(rw.generate(), "ac")
         self.assertNotContainsSequence(rw.generate(), "aa")
         self.assertNotContainsSequence(rw.generate(), "ce")
         self.assertContainsSequence(rw.generate(), "abc", times=100)
         self.assertContainsSequence(rw.generate(), "aeaeab", times=100)
コード例 #13
0
 def test_generate_file1(self):
     rw = final.RandomWriter(1, final.Tokenization.character)
     rw.train_iterable("abcaea")
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertNotContainsSequence(content, "ac")
         self.assertNotContainsSequence(content, "aa")
         self.assertNotContainsSequence(content, "ce")
         self.assertContainsSequence(content, "abc", times=100)
         self.assertContainsSequence(content, "aeaeab", times=100)
コード例 #14
0
 def test_generate_file4(self):
     rw = final.RandomWriter(1, final.Tokenization.byte)
     #                   a   b   c   a   e   a
     rw.train_iterable(b"\xfe\xff\x02\xfe\x03\xfe")
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rb") as fi:
             content = fi.read()
         self.assertNotContainsSequence(content, b"\xfe\x02")
         self.assertNotContainsSequence(content, b"\xfe\xfe")
         self.assertNotContainsSequence(content, b"\x02\x03")
         self.assertContainsSequence(content, b"\xfe\xff\x02", times=100)
         self.assertContainsSequence(content, b"\xfe\x03\xfe\x03\xfe\xff", times=100)
コード例 #15
0
 def test_train_url_utf8(self):
     rw = final.RandomWriter(5, final.Tokenization.character)
     rw.train_url("http://www.singingwizard.org/stuff/utf8test.txt")
     self.assertContainsSequence(rw.generate(), "ajtób", length=100000)
コード例 #16
0
 def test_train_url_word(self):
     rw = final.RandomWriter(1, final.Tokenization.word)
     rw.train_url("http://www.singingwizard.org/stuff/pg24132.txt")
     self.assertContainsSequence(rw.generate(),
                                 "she had".split(),
                                 length=100000)
コード例 #17
0
 def test_train_url_bytes(self):
     rw = final.RandomWriter(4, final.Tokenization.byte)
     rw.train_url("http://www.singingwizard.org/stuff/pg24132.txt")
     self.assertContainsSequence(rw.generate(), b"ad di", length=300000)
コード例 #18
0
 def test_bytes_nonutf8_file(self):
     rw = final.RandomWriter(1, final.Tokenization.byte)
     rw.train_url("http://www.singingwizard.org/stuff/nonutf8.txt")
     self.assertTrue(isinstance(next(iter(rw.generate())), (int, bytes)))
     self.assertContainsSequence(rw.generate(), b"\xfe\xff\xfe")
     self.assertNotContainsSequence(rw.generate(), b"\x02\xfe")