Example #1
0
 def test_write_freq(self):
     counter = Counter(one=1, two=2, three=3)
     filepath = Config.output_path + "/write_freq_test.json"
     Worker().write_freq(filepath, counter)
     with open(filepath) as f:
         raw_file_contents = f.read()
     result = ast.literal_eval(raw_file_contents)
     expected = {"one": 1, "two": 2, "three": 3}
     self.assertDictEqual(
         result, expected,
         'counter written to disk did not have right key/values')
Example #2
0
 def test_word_freq(self):
     filepath = "data/test/pledge.txt"
     self.assertTrue(isfile(filepath), 'data/test/pledge.txt should exist')
     counter = Master([Config.export_path
                       ]).read_freq(Worker().word_freq(filepath))
     self.assertEqual(counter['pledge'], 1,
                      'the pledge should contain the word pledge once')
     self.assertEqual(counter['allegiance'], 1,
                      'the pledge should contain the word allegiance once')
     self.assertEqual(counter['united'], 1,
                      'the pledge should contain the word United once')
     self.assertEqual(
         counter['United'], 0,
         'word_freq should count the word United as lowercase')
     self.assertEqual(counter['god'], 1,
                      'the pledge should contain the word God once')
Example #3
0
    def test_local_end_to_end(self):

        os.system("cp data/test/*.txt %s" % Config.input_path)
        worker = Worker()
        worker.process_input()
        master = Master(['.'])
        master.synch_all_workers()
        master.tally()
        output1 = master.output()
        expected_output1 = '''the: 56510
and: 37915
to: 27984
of: 27884
a: 22899
i: 22159
in: 17366
it: 15182
that: 14578
was: 13184
'''
        self.assertEqual(output1, expected_output1,
                         'End to End output1 not as expected')

        uri = 'http://www.constitution.org/usdeclar.txt'
        worker.injest(uri)
        worker.process_input()
        master.synch_all_workers()
        master.tally()
        output2 = master.output()
        expected_output2 = '''the: 56588
and: 37972
to: 28049
of: 27964
a: 22914
i: 22159
in: 17387
it: 15188
that: 14591
was: 13184
'''
        self.assertEqual(output2, expected_output2,
                         'End to End output2 not as expected')
Example #4
0
    def test_injest(self):

        worker = Worker()

        uri1 = 'http://www.constitution.org/usdeclar.txt'
        filename1 = Config.input_path + '/' + basename(uri1)
        worker.injest(uri1)
        self.assertFileHasSha1(filename1,
                               '15684690e8132044f378b4d4af8a7331c8da17b1')

        uri2 = "data/test/pledge.txt"
        filename2 = Config.input_path + '/' + basename(uri2)
        worker.injest(uri2)
        self.assertFileHasSha1(filename2,
                               'b253badebab8945669ecb7e2181bc22e0c9998b5')
Example #5
0
 def test_process_input(self):
     os.system("cp data/test/*.txt %s" % Config.input_path)
     Worker().process_input()
     self.assertEqual(len(ls(Config.export_path)),
                      len(ls(Config.input_path)))