Beispiel #1
0
 def test_bytes(self):
     rw = randomwriter.RandomWriter(2, randomwriter.Tokenization.byte)
     rw.train_iterable(b"What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
                       b"in action how like an angel! in apprehension how like a god! the beauty of the world, the paragon of animals!")
     self.assertTrue(isinstance(next(iter(rw.generate())), (int, bytes)))
     self.assertContainsSequence(rw.generate(), b"worm")
     self.assertNotContainsSequence(rw.generate(), b"mals ")
Beispiel #2
0
 def test_words(self):
     rw = randomwriter.RandomWriter(1, randomwriter.Tokenization.word)
     rw.train_iterable("the given iterable must contain the sequence the")
     self.assertContainsSequence(rw.generate(), "iterable must contain".split(" "), times=10)
     self.assertContainsSequence(rw.generate(), "the sequence".split(" "), times=200)
     self.assertNotContainsSequence(rw.generate(), "the the".split(" "))
     self.assertNotContainsSequence(rw.generate(), "the iterable".split(" "))
Beispiel #3
0
 def test_numeric_sequence_notin(self):
     rw = randomwriter.RandomWriter(2)
     rw.train_iterable((1,2,3,4,5,5,5,4,3,2,1,2,4,5))
     self.assertNotContainsSequence(rw.generate(), [5,5,3])
     self.assertNotContainsSequence(rw.generate(), [1,2,5])
     self.assertNotContainsSequence(rw.generate(), [4,2])
     self.assertNotContainsSequence(rw.generate(), ["5"])
Beispiel #4
0
 def test_characters_level3(self):
     rw = randomwriter.RandomWriter(3, randomwriter.Tokenization.character)
     rw.train_iterable("What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
                       "in action how like an angel! in apprehension how like a god! the beauty of the world, the paragon of animals!")
     self.assertIsInstance(next(iter(rw.generate())), str)
     self.assertContainsSequence(rw.generate(), "n how n")
     self.assertNotContainsSequence(rw.generate(), "worm")
     self.assertNotContainsSequence(rw.generate(), "mals ")
Beispiel #5
0
 def test_generate_file_size(self):
     rw = randomwriter.RandomWriter(1, randomwriter.Tokenization.character)
     rw.train_iterable("abcaea")
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertGreaterEqual(len(content), self.DEFAULT_LENGTH)
         self.assertLessEqual(len(content), self.DEFAULT_LENGTH+2)
Beispiel #6
0
 def test_numeric_sequence_in(self):
     rw = randomwriter.RandomWriter(2)
     rw.train_iterable((1,2,3,4,5,5,5,4,3,2,1,2,4,5))
     self.assertIsInstance(next(iter(rw.generate())), int)
     self.assertContainsSequence(rw.generate(), [3,4,5,5,4,3,2], times=10)
     self.assertContainsSequence(rw.generate(), [3,4,5,5,5,5,4,3,2])
     self.assertContainsSequence(rw.generate(), [5,5,5,5,5])
     self.assertContainsSequence(rw.generate(), [3,2,1,2,4,5,5,4])
     self.assertContainsSequence(rw.generate(), [3,2,1,2,3,4,5,5,4])
Beispiel #7
0
 def test_generate_file_bytes(self):
     rw = randomwriter.RandomWriter(2, randomwriter.Tokenization.byte)
     rw.train_iterable(b"What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
                       b"in action how like an angel! in apprehension how like a god! the beauty of the world, the paragon of animals!")
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertContainsSequence(content, "worm")
Beispiel #8
0
 def test_generate_file2(self):
     rw = randomwriter.RandomWriter(1, randomwriter.Tokenization.word)
     rw.train_iterable("a the word the")
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertContainsSequence(content, "the word", times=100)
         self.assertNotContainsSequence(content, "the a")
Beispiel #9
0
 def test_generate_file3(self):
     rw = randomwriter.RandomWriter(2, randomwriter.Tokenization.none)
     rw.train_iterable((1,2,3,4,5,5,4,3,2,1))
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertContainsSequence(content, "3 4 5 5 4 3 2", times=100)
         self.assertNotContainsSequence(content, "5 5 3")
         self.assertNotContainsSequence(content, "1 2 5")
Beispiel #10
0
 def test_words2(self):
     rw = randomwriter.RandomWriter(2, randomwriter.Tokenization.word)
     rw.train_iterable("What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
                       "in action how like an angel! in apprehension how like a god! the beauty of the world, the paragon of animals!")
     self.assertIsInstance(next(iter(rw.generate())), str)
     self.assertContainsSequence(rw.generate(), "action how like a god!".split(" "), length=50000)
     self.assertContainsSequence(rw.generate(), "infinite in faculty!".split(" "), length=50000)
     self.assertNotContainsSequence(rw.generate(), "man angel".split(" "), length=50000)
     self.assertNotContainsSequence(rw.generate(), "infinite in reason".split(" "), length=50000)
     self.assertNotContainsSequence(rw.generate(), ("worm",))
Beispiel #11
0
 def test_generate_file1(self):
     rw = randomwriter.RandomWriter(1, randomwriter.Tokenization.character)
     rw.train_iterable("abcaea")
     with nonexistant_filename() as fn:
         rw.generate_file(fn, self.DEFAULT_LENGTH)
         with open(fn, "rt") as fi:
             content = fi.read()
         self.assertContainsSequence(content, "abc", times=100)
         self.assertContainsSequence(content, "aeaeab", times=100)
         self.assertNotContainsSequence(content, "ac")
         self.assertNotContainsSequence(content, "aa")
         self.assertNotContainsSequence(content, "ce")
Beispiel #12
0
 def test_save_load_pickle(self):
     rw = randomwriter.RandomWriter(1, randomwriter.Tokenization.character)
     rw.train_iterable("abcaea")
     with nonexistant_filename() as fn:
         rw.save_pickle(fn)
         rw2 = randomwriter.RandomWriter.load_pickle(fn)
Beispiel #13
0
 def test_generate_count(self):
     rw = randomwriter.RandomWriter(2, randomwriter.Tokenization.character)
     rw.train_iterable("What a piece of work is man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! "
                       "in action how like an angel! in apprehension how like a god! the beauty of the world, the paragon of animals!")
     generated = len(list(itertools.islice(rw.generate(), 10000)))
     self.assertEqual(generated, 10000)
Beispiel #14
0
 def test_numeric_sequence(self):
     rw = randomwriter.RandomWriter(2)
     rw.train_iterable((1,2,3,4,5,5,4,3,2,1))
     self.assertContainsSequence(rw.generate(), [3,4,5,5,4,3,2], times=10)
     self.assertNotContainsSequence(rw.generate(), [5,5,3])
     self.assertNotContainsSequence(rw.generate(), [1,2,5])
Beispiel #15
0
 def test_train_url_utf8(self):
     rw = randomwriter.RandomWriter(5, randomwriter.Tokenization.character)
     rw.train_url("http://www.singingwizard.org/stuff/utf8test.txt")
     self.assertContainsSequence(rw.generate(), "ajtób", length=100000)
Beispiel #16
0
 def test_train_url_word(self):
     rw = randomwriter.RandomWriter(1, randomwriter.Tokenization.word)
     rw.train_url("https://www.gutenberg.org/cache/epub/24132/pg24132.txt")
     self.assertContainsSequence(rw.generate(), "she had".split(), length=100000)
Beispiel #17
0
 def test_train_url_bytes(self):
     rw = randomwriter.RandomWriter(4, randomwriter.Tokenization.byte)
     rw.train_url("https://www.gutenberg.org/cache/epub/24132/pg24132.txt")
     self.assertContainsSequence(rw.generate(), b"ad di", length=300000)