def wenyan(): userlog = get_wy_db() tinput = flask.request.values.get('input', '') formgetlang = flask.request.values.get('lang') displaylang = flask.request.values.get('dl') if formgetlang == 'c2m': lang = 'c2m' elif formgetlang == 'm2c': lang = 'm2c' else: # == auto cscore, mscore = calctxtstat(tinput) if cscore == mscore: lang = None elif checktxttype(cscore, mscore) == 'c': lang = 'c2m' else: lang = 'm2c' ip = flask.request.remote_addr accepttw = flask.g.get('accepttw') L = (lambda x: zhconv(x, 'zh-tw')) if accepttw else (lambda x: x) origcnt = userlog.count(ip) count = 0 valid = wy_validate(ip, origcnt, userlog) talign = flask.Markup('[]') if valid == 1: origcnt = 0 userlog.delete(ip) elif valid == 0: logging.warning('Captcha failed: %s' % ip) if not tinput: toutput = '' elif valid == 0: toutput = flask.Markup(L('<p class="error">回答错误,请重试。</p>')) elif lang is None: toutput = linebreak(tinput) elif len(tinput) > MAX_CHAR * (CHAR_RATIO if lang == 'c2m' else 1): toutput = flask.Markup(L('<p class="error">文本过长,请切分后提交。</p>')) else: tinput, tres, count = mosesproxy.translate(tinput, lang, True, True, True) toutput, talign = translateresult(tres, L) userlog.add(ip, count) userlog.commit() captcha = '' if origcnt + count > userlog.maxcnt: captcha = L(wy_gencaptcha()) return flask.render_template( ('translate_zhtw.html' if accepttw else 'translate.html'), tinput=tinput, toutput=toutput, talign=talign, captcha=flask.Markup(captcha))
def wenyan(): userlog = get_wy_db() tinput = flask.request.values.get('input', '') formgetlang = flask.request.values.get('lang') displaylang = flask.request.values.get('dl') if formgetlang == 'c2m': lang = 'c2m' elif formgetlang == 'm2c': lang = 'm2c' else: # == auto cscore, mscore = calctxtstat(tinput) if cscore == mscore: lang = None elif checktxttype(cscore, mscore) == 'c': lang = 'c2m' else: lang = 'm2c' ip = flask.request.remote_addr accepttw = flask.g.get('accepttw') L = (lambda x: zhconv(x, 'zh-tw')) if accepttw else (lambda x: x) origcnt = userlog.count(ip) count = 0 valid = wy_validate(ip, origcnt, userlog) talign = flask.Markup('[]') if valid == 1: origcnt = 0 userlog.delete(ip) elif valid == 0: logging.warning('Captcha failed: %s' % ip) if not tinput: toutput = '' elif valid == 0: toutput = L('<p class="error">回答错误,请重试。</p>') elif lang is None: toutput = linebreak(tinput) elif len(tinput) > MAX_CHAR * (CHAR_RATIO if lang == 'c2m' else 1): toutput = L('<p class="error">文本过长,请切分后提交。</p>') else: tinput, tres, count = mosesproxy.translate( tinput, lang, True, True, True) toutput, talign = translateresult(tres, L) userlog.add(ip, count) captcha = '' if origcnt + count > userlog.maxcnt: captcha = L(wy_gencaptcha()) return flask.render_template(('translate_zhtw.html' if accepttw else 'translate.html'), tinput=tinput, toutput=toutput, talign=talign, captcha=flask.Markup(captcha))
"CREATE TABLE sentences (id INTEGER PRIMARY KEY, sent TEXT, type INTEGER)") db.commit() cf = open(os.path.join(OS_DATA, "zhc.txt")).read().split('\n') mf = open(os.path.join(OS_DATA, "zhm.txt")).read().split('\n') count = 0 ccount = 0 mcount = 0 for c, m in zip(cf, mf): if len(c) < 50: cache.set(c, m) if 15 < len(c) < 25: txt = c.strip('“”‘’;:') cscore, mscore = calctxtstat(txt) if abs(cscore - mscore) < 20: cur.execute( "REPLACE INTO sentences (id, sent, type) VALUES (?, ?, ?)", (crc32(txt), txt, 0)) ccount += 1 if 15 < len(m) < 25: txt = m.strip('“”‘’;:') cscore, mscore = calctxtstat(txt) if abs(cscore - mscore) < 20: cur.execute( "REPLACE INTO sentences (id, sent, type) VALUES (?, ?, ?)", (crc32(txt), txt, 1)) mcount += 1 count += 1 if count % 100000 == 0:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sqlite3 import zhutil from config import * db_ts = sqlite3.connect(DB_testsent) cur_ts = db_ts.cursor() count = 0 correct = 0 delta = [] for sent, typ in cur_ts.execute("SELECT sent, type FROM sentences"): cscore, mscore = zhutil.calctxtstat(sent) delta.append(cscore - mscore) count += 1 if cscore > mscore and typ == 0 or cscore < mscore and typ == 1: correct += 1 print('Correct/Count:', correct, count) print(' :', correct / count) mean = sum(delta) / len(delta) stdev = (sum((x - mean)**2 for x in delta) / len(delta))**.5 print('Mean, Stdev :', mean, stdev)
"CREATE TABLE sentences (id INTEGER PRIMARY KEY, sent TEXT, type INTEGER)") db.commit() cf = open(os.path.join(OS_DATA, "zhc.txt")).read().split('\n') mf = open(os.path.join(OS_DATA, "zhm.txt")).read().split('\n') count = 0 ccount = 0 mcount = 0 for c, m in zip(cf, mf): if len(c) < 50: cache.set(c, m) if 15 < len(c) < 25: txt = c.strip('“”‘’;:') cscore, mscore = calctxtstat(txt) if abs(cscore - mscore) < 20: cur.execute( "REPLACE INTO sentences (id, sent, type) VALUES (?, ?, ?)", (crc32(txt), txt, 0)) ccount += 1 if 15 < len(m) < 25: txt = m.strip('“”‘’;:') cscore, mscore = calctxtstat(txt) if abs(cscore - mscore) < 20: cur.execute( "REPLACE INTO sentences (id, sent, type) VALUES (?, ?, ?)", (crc32(txt), txt, 1)) mcount += 1 count += 1 if count % 100000 == 0: print(count)