Esempio n. 1
0
def main():
	if args.lang == 'zh':
		coll.update(
			{ },
			{
				'$unset': {'trad': 1},
			},
			multi=True,
		)

	# Choose a function
	f_map = {
		'he': get_pronunciation_he,
		'zh': get_pronunciation_zh,
	}
	get_pronunciation_f = f_map[args.lang]

	count = 0
	cursor = coll.find({ 'txs': {'$exists': 1 } })
	total_count = cursor.count()
	for phrase in cursor:
		updates = None

		# Special dictionary-based pronunciation
		if args.lang == 'zh':
			entry = db.translations_zh.find_one({ 'base': phrase['base'] })
			if entry:
				updates = { 'pron': entry['pron'] }
		
		# Fall back onto generic wiktionary-based pronunciation
		if not updates:
			section = dbutils.get_section_for_phrase(db, phrase)
			if section != None:
				updates = get_pronunciation_f(phrase, section['text'])

		# Perform the updates, if they exist
		if updates:
			coll.update(
				{
					'base': phrase['base']
				},
				{
					'$set': updates,
				},
				multi=True,
				upsert=True,
			)

		# Increase the ticker
		count += 1
		print "\r", "{0:.2f}".format(100. * float(count) / float(total_count)), '%',
Esempio n. 2
0
def phrase_page(lang, base):

	# Authentication
	user_profile = auth.verify_auth_token()
	if user_profile == None or not auth.has_permission(user_profile, 'MANAGE_DICTIONARY'):
		return flask.redirect(flask.url_for('unauthorized_page'))

	# Get the phrase from the database
	phrase = mongo.db.phrases.find_one({
		'lang': lang,
		'base': base
	})

	section = dbutils.get_section_for_phrase(mongo, phrase)

	if phrase and flask.request.method == 'POST':
		# Apply changes from the form
		for k, txs in phrase['txs'].iteritems():
			for tx in txs:
				orig_rank = int(tx['rank'])
				tx['deleted'] = flask.request.form["tx-deleted-%s-%d" % (k, orig_rank)] == '1'
				tx['text'] = flask.request.form["tx-%s-%d" % (k, orig_rank)]
				tx['rank'] = int(flask.request.form["tx-rank-%s-%d" % (k, orig_rank)])
			phrase['txs'][k] = sorted(txs, key=lambda tx: tx['rank'])

		# Perform the update
		mongo.db.phrases.update(
			{
			'lang': lang,
			'base': base
			},
			phrase,
		)

	# Parse parameters
	show_deleted = int(flask.request.args.get('show-deleted', 0))

	# Pass the translation to the template
	return flask.render_template('phrase.html',
		lang         = lang,
		phrase       = phrase,
		section      = section,
		show_deleted = show_deleted
	)
Esempio n. 3
0
def phrases_page(lang):

	# Authentication
	user_profile = auth.verify_auth_token()
	if user_profile == None or not auth.has_permission(user_profile, 'manage_dictionary'):
		return flask.redirect(flask.url_for('unauthorized_page'))

	min_phrase = flask.request.args.get('min-phrase')
	max_phrase = flask.request.args.get('max-phrase')
	if min_phrase and max_phrase:
		min_phrase = int(min_phrase)
		max_phrase = int(max_phrase)
	else:
		min_phrase = 0
		max_phrase = 99


	phrases = mongo.db.phrases.find({
		'lang': lang,
		'rank': { '$gt': min_phrase-1, '$lt': max_phrase+1 }
	}).sort('rank', 1)

	processed_phrases = []
	for phrase in phrases:
		if dbutils.get_section_for_phrase(mongo, phrase):
			phrase['has_section'] = 1
		if 'txs' in phrase:
			phrase['has_txs'] = 1
		processed_phrases.append(phrase)

	# Render the template by passing the total phrase counts
	return flask.render_template('phrases.html',
		lang          = lang,
		min_phrase    = min_phrase,
		max_phrase    = max_phrase,
		phrase_counts = processed_phrases,
	)
def Main() :

	phrases = None

	if args.base != None:
		phrases = coll.find(
			{
				'base': args.base,
			},
		).sort('rank', 1)
	else:
		print "update lang"
		coll.update(
			{ },
			{
				'$unset': 
					{ 'txs': 1 }
			},
			multi=True,
			upsert=True,
		)
		phrases = coll.find({ }).sort('rank', 1)

	print "Starting..."

	count = 0
	started = False
	for phrase in phrases:
		if args.skipto and not started:
			if phrase['base'] == args.skipto:
				started = True
			else:
				continue

		if args.lang == 'zh':
			entry = db.translations_zh.find_one({ 'base': phrase['base'] })
			if entry:
				all_txs = {}
				for pron in entry['txs'].keys():
					txs = [{
						"text"   : entry['txs'][pron][rank],
						"rank"   : rank,
						"deleted": False,
					} for rank in xrange(len(entry['txs'][pron]))]
					all_txs[pron] = txs
				write_translations(phrase['base'], all_txs)
				count += 1
				if count % 100 == 0:
					print count
				continue


		#print phrase['base']
		t0 = datetime.now()
		section = dbutils.get_section_for_phrase(db, phrase)
		t1 = datetime.now()
		#print "  get section: ", (t1-t0)

		if section != None:
			base = phrase['base']
			text = section['text']

			# Get the translations
			t0 = datetime.now()
			tx_hash = process_text(base, text)
			t1 = datetime.now()
			#print "  process: ", (t1-t0)

			# Write the document if it exists
			if tx_hash is not None:
				t0 = datetime.now()
				write_translations(base, tx_hash)
				t1 = datetime.now()
				#print "  insert: ", (t1-t0)
				count += 1
				if count % 100 == 0:
					print count

	if args.base:
		section = dbutils.get_section_for_phrase(db, phrase)
		phrase = coll.find_one(
			{
				'base': args.base,
			},
		)
		print section['text']
		print phrase['txs']