def test_parser_correctness(self): for id, known_metadata in known_metadata_mappings.items(): url_to_live_page = scraper.id_to_page_permalink(id) path_to_local_copy = "samples/" + str(id) + ".html" expected_output = known_metadata if self.use_live_page: html = urllib.urlopen(url_to_live_page).read() else: fp = open(path_to_local_copy,'r') html = fp.read() actual_output = parser.parse_img_html_page(html) self.assertDictEqual(actual_output, expected_output) '''
def test_parser_correctness(self): for id, known_metadata in known_metadata_mappings.items(): url_to_live_page = scraper.id_to_page_permalink(id) path_to_local_copy = "samples/" + str(id) + ".html" expected_output = known_metadata if self.use_live_page: html = urllib.urlopen(url_to_live_page).read() else: fp = open(path_to_local_copy, 'r') html = fp.read() actual_output = parser.parse_img_html_page(html) self.assertDictEqual(actual_output, expected_output) '''
def post_processing(data_dict, page_permalink=None): data_dict['page_permalink'] = scraper.id_to_page_permalink(data_dict['id']) return data_dict