예제 #1
0
 def test_scrape_june__13__2012(self):
   mydir = os.path.dirname(__file__)
   html = open(mydir + '/data/ucm308307.htm')
   expected_json = open(mydir + '/data/ucm308307.json').read()
   scraped_list = scrape_historic.scrape_report(html.read())
   actual_json = '\n'.join([json.dumps(s) for s in scraped_list])
   self.assertEqual(expected_json, actual_json, mydir + '/data/ucm308307.json')
예제 #2
0
 def test_scrape_june__13__2012(self):
     mydir = os.path.dirname(__file__)
     html = open(mydir + '/data/ucm308307.htm')
     expected_json = open(mydir + '/data/ucm308307.json').read()
     scraped_list = scrape_historic.scrape_report(html.read())
     actual_json = '\n'.join([json.dumps(s) for s in scraped_list])
     self.assertEqual(expected_json, actual_json,
                      mydir + '/data/ucm308307.json')
예제 #3
0
 def run(self):
     input_dir = self.input().path
     output_dir = self.output().path
     os.system('mkdir -p "%s"' % output_dir)
     target_filename = join(output_dir, 'res_historic.json')
     json_filename = open(target_filename, 'w')
     for html_filename in glob.glob(input_dir + '/*/*.htm'):
         html_filename = open(html_filename, 'r')
         scraped_file = scrape_historic.scrape_report(html_filename)
         for report in scraped_file:
             json_filename.write(json.dumps(report) + '\n')
예제 #4
0
파일: pipeline.py 프로젝트: gforz/openfda
 def run(self):
   input_dir = self.input().path
   output_dir = self.output().path
   os.system('mkdir -p "%s"' % output_dir)
   target_filename = join(output_dir, 'res_historic.json')
   json_filename = open(target_filename, 'w')
   for html_filename in glob.glob(input_dir + '/*/*.htm'):
     html_filename = open(html_filename, 'r')
     scraped_file = scrape_historic.scrape_report(html_filename)
     for report in scraped_file:
       json_filename.write(json.dumps(report) + '\n')