def test_scrape_june__13__2012(self): mydir = os.path.dirname(__file__) html = open(mydir + '/data/ucm308307.htm') expected_json = open(mydir + '/data/ucm308307.json').read() scraped_list = scrape_historic.scrape_report(html.read()) actual_json = '\n'.join([json.dumps(s) for s in scraped_list]) self.assertEqual(expected_json, actual_json, mydir + '/data/ucm308307.json')
def run(self): input_dir = self.input().path output_dir = self.output().path os.system('mkdir -p "%s"' % output_dir) target_filename = join(output_dir, 'res_historic.json') json_filename = open(target_filename, 'w') for html_filename in glob.glob(input_dir + '/*/*.htm'): html_filename = open(html_filename, 'r') scraped_file = scrape_historic.scrape_report(html_filename) for report in scraped_file: json_filename.write(json.dumps(report) + '\n')