Beispiel #1
0
 def test__articles_dump_to_file(self):
      
     titles = ['Ross_Ice_Shelf', 'Southern_Cross_Expedition', 'Ice_shelf']
     webtools.articles_dump_to_file(titles, 'test.xml.tmp')
     root = parse_tools.parse_xml('test.xml.tmp') 
     pages = root.findall(parse_tools.PAGE_TAG)
     self.assertEqual(len(pages), len(titles))
Beispiel #2
0
def make_dump(wiki_dump, articles_titles, compress=False):
    """ Download specified articles from Wikipedia site, 
        merges them into one file, compresses it as Wikipedia dump file
        @param articles_titles: article's canonic name on Wikipedia web page
        @param wiki_dump: output filename (if not specified default is used)
    """
    _log.debug("-"*80)
    _log.info('Executing makedump process on articles: {}'.format(articles_titles))
    _log.info('Dump path: {}'.format(wiki_dump))
      
    web_tools.articles_dump_to_file(articles_titles, wiki_dump, compress)
Beispiel #3
0
def make_dump(wiki_dump, articles_titles, compress=False):
    """ Download specified articles from Wikipedia site, 
        merges them into one file, compresses it as Wikipedia dump file
        @param articles_titles: article's canonic name on Wikipedia web page
        @param wiki_dump: output filename (if not specified default is used)
    """
    _log.debug("-" * 80)
    _log.info(
        'Executing makedump process on articles: {}'.format(articles_titles))
    _log.info('Dump path: {}'.format(wiki_dump))

    web_tools.articles_dump_to_file(articles_titles, wiki_dump, compress)