def main(): if args.lang == 'zh': coll.update( { }, { '$unset': {'trad': 1}, }, multi=True, ) # Choose a function f_map = { 'he': get_pronunciation_he, 'zh': get_pronunciation_zh, } get_pronunciation_f = f_map[args.lang] count = 0 cursor = coll.find({ 'txs': {'$exists': 1 } }) total_count = cursor.count() for phrase in cursor: updates = None # Special dictionary-based pronunciation if args.lang == 'zh': entry = db.translations_zh.find_one({ 'base': phrase['base'] }) if entry: updates = { 'pron': entry['pron'] } # Fall back onto generic wiktionary-based pronunciation if not updates: section = dbutils.get_section_for_phrase(db, phrase) if section != None: updates = get_pronunciation_f(phrase, section['text']) # Perform the updates, if they exist if updates: coll.update( { 'base': phrase['base'] }, { '$set': updates, }, multi=True, upsert=True, ) # Increase the ticker count += 1 print "\r", "{0:.2f}".format(100. * float(count) / float(total_count)), '%',
def phrase_page(lang, base): # Authentication user_profile = auth.verify_auth_token() if user_profile == None or not auth.has_permission(user_profile, 'MANAGE_DICTIONARY'): return flask.redirect(flask.url_for('unauthorized_page')) # Get the phrase from the database phrase = mongo.db.phrases.find_one({ 'lang': lang, 'base': base }) section = dbutils.get_section_for_phrase(mongo, phrase) if phrase and flask.request.method == 'POST': # Apply changes from the form for k, txs in phrase['txs'].iteritems(): for tx in txs: orig_rank = int(tx['rank']) tx['deleted'] = flask.request.form["tx-deleted-%s-%d" % (k, orig_rank)] == '1' tx['text'] = flask.request.form["tx-%s-%d" % (k, orig_rank)] tx['rank'] = int(flask.request.form["tx-rank-%s-%d" % (k, orig_rank)]) phrase['txs'][k] = sorted(txs, key=lambda tx: tx['rank']) # Perform the update mongo.db.phrases.update( { 'lang': lang, 'base': base }, phrase, ) # Parse parameters show_deleted = int(flask.request.args.get('show-deleted', 0)) # Pass the translation to the template return flask.render_template('phrase.html', lang = lang, phrase = phrase, section = section, show_deleted = show_deleted )
def phrases_page(lang): # Authentication user_profile = auth.verify_auth_token() if user_profile == None or not auth.has_permission(user_profile, 'manage_dictionary'): return flask.redirect(flask.url_for('unauthorized_page')) min_phrase = flask.request.args.get('min-phrase') max_phrase = flask.request.args.get('max-phrase') if min_phrase and max_phrase: min_phrase = int(min_phrase) max_phrase = int(max_phrase) else: min_phrase = 0 max_phrase = 99 phrases = mongo.db.phrases.find({ 'lang': lang, 'rank': { '$gt': min_phrase-1, '$lt': max_phrase+1 } }).sort('rank', 1) processed_phrases = [] for phrase in phrases: if dbutils.get_section_for_phrase(mongo, phrase): phrase['has_section'] = 1 if 'txs' in phrase: phrase['has_txs'] = 1 processed_phrases.append(phrase) # Render the template by passing the total phrase counts return flask.render_template('phrases.html', lang = lang, min_phrase = min_phrase, max_phrase = max_phrase, phrase_counts = processed_phrases, )
def Main() : phrases = None if args.base != None: phrases = coll.find( { 'base': args.base, }, ).sort('rank', 1) else: print "update lang" coll.update( { }, { '$unset': { 'txs': 1 } }, multi=True, upsert=True, ) phrases = coll.find({ }).sort('rank', 1) print "Starting..." count = 0 started = False for phrase in phrases: if args.skipto and not started: if phrase['base'] == args.skipto: started = True else: continue if args.lang == 'zh': entry = db.translations_zh.find_one({ 'base': phrase['base'] }) if entry: all_txs = {} for pron in entry['txs'].keys(): txs = [{ "text" : entry['txs'][pron][rank], "rank" : rank, "deleted": False, } for rank in xrange(len(entry['txs'][pron]))] all_txs[pron] = txs write_translations(phrase['base'], all_txs) count += 1 if count % 100 == 0: print count continue #print phrase['base'] t0 = datetime.now() section = dbutils.get_section_for_phrase(db, phrase) t1 = datetime.now() #print " get section: ", (t1-t0) if section != None: base = phrase['base'] text = section['text'] # Get the translations t0 = datetime.now() tx_hash = process_text(base, text) t1 = datetime.now() #print " process: ", (t1-t0) # Write the document if it exists if tx_hash is not None: t0 = datetime.now() write_translations(base, tx_hash) t1 = datetime.now() #print " insert: ", (t1-t0) count += 1 if count % 100 == 0: print count if args.base: section = dbutils.get_section_for_phrase(db, phrase) phrase = coll.find_one( { 'base': args.base, }, ) print section['text'] print phrase['txs']