def test__articles_dump_to_file(self): titles = ['Ross_Ice_Shelf', 'Southern_Cross_Expedition', 'Ice_shelf'] webtools.articles_dump_to_file(titles, 'test.xml.tmp') root = parse_tools.parse_xml('test.xml.tmp') pages = root.findall(parse_tools.PAGE_TAG) self.assertEqual(len(pages), len(titles))
def make_dump(wiki_dump, articles_titles, compress=False): """ Download specified articles from Wikipedia site, merges them into one file, compresses it as Wikipedia dump file @param articles_titles: article's canonic name on Wikipedia web page @param wiki_dump: output filename (if not specified default is used) """ _log.debug("-"*80) _log.info('Executing makedump process on articles: {}'.format(articles_titles)) _log.info('Dump path: {}'.format(wiki_dump)) web_tools.articles_dump_to_file(articles_titles, wiki_dump, compress)
def make_dump(wiki_dump, articles_titles, compress=False): """ Download specified articles from Wikipedia site, merges them into one file, compresses it as Wikipedia dump file @param articles_titles: article's canonic name on Wikipedia web page @param wiki_dump: output filename (if not specified default is used) """ _log.debug("-" * 80) _log.info( 'Executing makedump process on articles: {}'.format(articles_titles)) _log.info('Dump path: {}'.format(wiki_dump)) web_tools.articles_dump_to_file(articles_titles, wiki_dump, compress)