Ejemplo n.º 1
0
 def test__make_dump(self):
     #create the dump file
     article_title=["Rain"]
     articles_expected_set = set(article_title)
     wiki_knowledge.make_dump(self.tmp_dump_file,article_title)
     
     actual_titles_set = {wikidoc.title for wikidoc in parse_tools.iterate_wiki_pages(self.tmp_dump_file)}
     self.assertEqual(actual_titles_set, articles_expected_set, "title mismatch")
Ejemplo n.º 2
0
    def test__make_dump(self):
        #create the dump file
        article_title = ["Rain"]
        articles_expected_set = set(article_title)
        wiki_knowledge.make_dump(self.tmp_dump_file, article_title)

        actual_titles_set = {
            wikidoc.title
            for wikidoc in parse_tools.iterate_wiki_pages(self.tmp_dump_file)
        }
        self.assertEqual(actual_titles_set, articles_expected_set,
                         "title mismatch")
Ejemplo n.º 3
0
    def test_extract_pages(self):
        '''regression check that extract_pages works well'''
        # template.format('id', ' title', 'length'); template = "{:<12}{:<30}{:>12}"
        expected =[
            (243478,      'Ross Ice Shelf',                       13734),
            (18798090,    'Southern Cross Expedition',            39110),
            (343246,      'Ice shelf',                             8262)
        ]
        
        test__parse_tools_xml =  getInputFile(FilesList.test__parse_tools)
        
        actual = [(wdoc.id, wdoc.title, len(wdoc.wiki_text)) 
            for wdoc
            in pt.iterate_wiki_pages(test__parse_tools_xml)] #  extract_pages(test__parse_tools_xml)]        

        self.assertSequenceEqual(actual, expected, "Assertion failure: \nActual={}\nExpected={}".format(actual, expected))
Ejemplo n.º 4
0
    def test_extract_pages(self):
        '''regression check that extract_pages works well'''
        # template.format('id', ' title', 'length'); template = "{:<12}{:<30}{:>12}"
        expected = [(243478, 'Ross Ice Shelf', 13734),
                    (18798090, 'Southern Cross Expedition', 39110),
                    (343246, 'Ice shelf', 8262)]

        test__parse_tools_xml = getInputFile(FilesList.test__parse_tools)

        actual = [(wdoc.id, wdoc.title, len(wdoc.wiki_text))
                  for wdoc in pt.iterate_wiki_pages(test__parse_tools_xml)
                  ]  #  extract_pages(test__parse_tools_xml)]

        self.assertSequenceEqual(
            actual, expected,
            "Assertion failure: \nActual={}\nExpected={}".format(
                actual, expected))