def test__make_dump(self): #create the dump file article_title=["Rain"] articles_expected_set = set(article_title) wiki_knowledge.make_dump(self.tmp_dump_file,article_title) actual_titles_set = {wikidoc.title for wikidoc in parse_tools.iterate_wiki_pages(self.tmp_dump_file)} self.assertEqual(actual_titles_set, articles_expected_set, "title mismatch")
def test__make_dump(self): #create the dump file article_title = ["Rain"] articles_expected_set = set(article_title) wiki_knowledge.make_dump(self.tmp_dump_file, article_title) actual_titles_set = { wikidoc.title for wikidoc in parse_tools.iterate_wiki_pages(self.tmp_dump_file) } self.assertEqual(actual_titles_set, articles_expected_set, "title mismatch")
def test_extract_pages(self): '''regression check that extract_pages works well''' # template.format('id', ' title', 'length'); template = "{:<12}{:<30}{:>12}" expected =[ (243478, 'Ross Ice Shelf', 13734), (18798090, 'Southern Cross Expedition', 39110), (343246, 'Ice shelf', 8262) ] test__parse_tools_xml = getInputFile(FilesList.test__parse_tools) actual = [(wdoc.id, wdoc.title, len(wdoc.wiki_text)) for wdoc in pt.iterate_wiki_pages(test__parse_tools_xml)] # extract_pages(test__parse_tools_xml)] self.assertSequenceEqual(actual, expected, "Assertion failure: \nActual={}\nExpected={}".format(actual, expected))
def test_extract_pages(self): '''regression check that extract_pages works well''' # template.format('id', ' title', 'length'); template = "{:<12}{:<30}{:>12}" expected = [(243478, 'Ross Ice Shelf', 13734), (18798090, 'Southern Cross Expedition', 39110), (343246, 'Ice shelf', 8262)] test__parse_tools_xml = getInputFile(FilesList.test__parse_tools) actual = [(wdoc.id, wdoc.title, len(wdoc.wiki_text)) for wdoc in pt.iterate_wiki_pages(test__parse_tools_xml) ] # extract_pages(test__parse_tools_xml)] self.assertSequenceEqual( actual, expected, "Assertion failure: \nActual={}\nExpected={}".format( actual, expected))