def test_write_freq(self): counter = Counter(one=1, two=2, three=3) filepath = Config.output_path + "/write_freq_test.json" Worker().write_freq(filepath, counter) with open(filepath) as f: raw_file_contents = f.read() result = ast.literal_eval(raw_file_contents) expected = {"one": 1, "two": 2, "three": 3} self.assertDictEqual( result, expected, 'counter written to disk did not have right key/values')
def test_word_freq(self): filepath = "data/test/pledge.txt" self.assertTrue(isfile(filepath), 'data/test/pledge.txt should exist') counter = Master([Config.export_path ]).read_freq(Worker().word_freq(filepath)) self.assertEqual(counter['pledge'], 1, 'the pledge should contain the word pledge once') self.assertEqual(counter['allegiance'], 1, 'the pledge should contain the word allegiance once') self.assertEqual(counter['united'], 1, 'the pledge should contain the word United once') self.assertEqual( counter['United'], 0, 'word_freq should count the word United as lowercase') self.assertEqual(counter['god'], 1, 'the pledge should contain the word God once')
def test_local_end_to_end(self): os.system("cp data/test/*.txt %s" % Config.input_path) worker = Worker() worker.process_input() master = Master(['.']) master.synch_all_workers() master.tally() output1 = master.output() expected_output1 = '''the: 56510 and: 37915 to: 27984 of: 27884 a: 22899 i: 22159 in: 17366 it: 15182 that: 14578 was: 13184 ''' self.assertEqual(output1, expected_output1, 'End to End output1 not as expected') uri = 'http://www.constitution.org/usdeclar.txt' worker.injest(uri) worker.process_input() master.synch_all_workers() master.tally() output2 = master.output() expected_output2 = '''the: 56588 and: 37972 to: 28049 of: 27964 a: 22914 i: 22159 in: 17387 it: 15188 that: 14591 was: 13184 ''' self.assertEqual(output2, expected_output2, 'End to End output2 not as expected')
def test_injest(self): worker = Worker() uri1 = 'http://www.constitution.org/usdeclar.txt' filename1 = Config.input_path + '/' + basename(uri1) worker.injest(uri1) self.assertFileHasSha1(filename1, '15684690e8132044f378b4d4af8a7331c8da17b1') uri2 = "data/test/pledge.txt" filename2 = Config.input_path + '/' + basename(uri2) worker.injest(uri2) self.assertFileHasSha1(filename2, 'b253badebab8945669ecb7e2181bc22e0c9998b5')
def test_process_input(self): os.system("cp data/test/*.txt %s" % Config.input_path) Worker().process_input() self.assertEqual(len(ls(Config.export_path)), len(ls(Config.input_path)))