Example #1
0
def Main() :

	# Remove existing entries
	db.sections.remove({ })

	xml_file = scraper.get_wiktionary_dump_path('en')

	# Parse the pages of the xml file specified as an cmd-line argument
	scraper.parse_pages(db,
		xml_file       = xml_file,
		valid_phrases  = None,
		process_text_f = process_text,
		show_progress  = True,
		max_pages      = None
	)
def Main():
	db.phrase_counts.remove({
		'lang': args.lang,
	})

	xml_file = args.path #io.open(args.path, encoding='utf-8', errors='replace')

	print ""

	scraper.parse_pages(db,
		xml_file       = xml_file,
		max_pages      = 100000,
		process_text_f = process_text,
		show_progress  = False)

	global total_count
	print "Total count: %s" % total_count