def Main() : # Remove existing entries db.sections.remove({ }) xml_file = scraper.get_wiktionary_dump_path('en') # Parse the pages of the xml file specified as an cmd-line argument scraper.parse_pages(db, xml_file = xml_file, valid_phrases = None, process_text_f = process_text, show_progress = True, max_pages = None )
def Main(): db.phrase_counts.remove({ 'lang': args.lang, }) xml_file = args.path #io.open(args.path, encoding='utf-8', errors='replace') print "" scraper.parse_pages(db, xml_file = xml_file, max_pages = 100000, process_text_f = process_text, show_progress = False) global total_count print "Total count: %s" % total_count