예제 #1
0
def main():

	# Get ISO codes
	iso_codes_hash    = scraper.get_iso_codes_hash()
	for k, v in iso_codes_hash.iteritems():
		v['isoCode'] = k
		iso_codes_hash[k] = v
	iso_language_hashes = sorted(iso_codes_hash.values(), key = lambda h: h['isoCode'])

	h = None
	good_h = []
	for h in iso_language_hashes:
		if h['isoCode'] in ['is']:
			good_h.append(h)
			

	# Create a pool and execute jobs
	#pool = multiprocessing.Pool(parallel_level)
	#pool.map(process_main, ['fr'])
	for lang in good_h:
		process_main(lang)
예제 #2
0
import scraper

h = scraper.get_iso_codes_hash()

things = []
for code, subh in h.iteritems():
	name = subh['name']
	if 'wiktionaryName' in subh:
		name = subh['wiktionaryName']
	print "%s: %s" % (code, name)
예제 #3
0
invalidcharpattern = r"\"|\(|\)|\[|\]|\{|\}|\<|\>|!|&|\?|%|\+|:|;|«|»|=|\*|#|\n|\.|@|\$|\\|~|\_"

# Set up an arg parser
parser = argparse.ArgumentParser()
parser.add_argument("lang")
parser.add_argument("path")

# Parse the arguments
args = parser.parse_args()

# Connect to the DB
db = dbutils.DBConnect()

# Get langauge hash
language_info_hash = scraper.get_iso_codes_hash()[args.lang]

total_count = 0


################################################################################
# Main
#
################################################################################
def Main():
	db.phrase_counts.remove({
		'lang': args.lang,
	})

	xml_file = args.path #io.open(args.path, encoding='utf-8', errors='replace')