コード例 #1
0
def wenyan():
    userlog = get_wy_db()
    tinput = flask.request.values.get('input', '')
    formgetlang = flask.request.values.get('lang')
    displaylang = flask.request.values.get('dl')
    if formgetlang == 'c2m':
        lang = 'c2m'
    elif formgetlang == 'm2c':
        lang = 'm2c'
    else:  # == auto
        cscore, mscore = calctxtstat(tinput)
        if cscore == mscore:
            lang = None
        elif checktxttype(cscore, mscore) == 'c':
            lang = 'c2m'
        else:
            lang = 'm2c'

    ip = flask.request.remote_addr
    accepttw = flask.g.get('accepttw')
    L = (lambda x: zhconv(x, 'zh-tw')) if accepttw else (lambda x: x)
    origcnt = userlog.count(ip)
    count = 0
    valid = wy_validate(ip, origcnt, userlog)
    talign = flask.Markup('[]')
    if valid == 1:
        origcnt = 0
        userlog.delete(ip)
    elif valid == 0:
        logging.warning('Captcha failed: %s' % ip)
    if not tinput:
        toutput = ''
    elif valid == 0:
        toutput = flask.Markup(L('<p class="error">回答错误,请重试。</p>'))
    elif lang is None:
        toutput = linebreak(tinput)
    elif len(tinput) > MAX_CHAR * (CHAR_RATIO if lang == 'c2m' else 1):
        toutput = flask.Markup(L('<p class="error">文本过长,请切分后提交。</p>'))
    else:
        tinput, tres, count = mosesproxy.translate(tinput, lang, True, True,
                                                   True)
        toutput, talign = translateresult(tres, L)
        userlog.add(ip, count)
    userlog.commit()
    captcha = ''
    if origcnt + count > userlog.maxcnt:
        captcha = L(wy_gencaptcha())
    return flask.render_template(
        ('translate_zhtw.html' if accepttw else 'translate.html'),
        tinput=tinput,
        toutput=toutput,
        talign=talign,
        captcha=flask.Markup(captcha))
コード例 #2
0
ファイル: wenyan.py プロジェクト: gumblex/pywebapps
def wenyan():
    userlog = get_wy_db()
    tinput = flask.request.values.get('input', '')
    formgetlang = flask.request.values.get('lang')
    displaylang = flask.request.values.get('dl')
    if formgetlang == 'c2m':
        lang = 'c2m'
    elif formgetlang == 'm2c':
        lang = 'm2c'
    else:  # == auto
        cscore, mscore = calctxtstat(tinput)
        if cscore == mscore:
            lang = None
        elif checktxttype(cscore, mscore) == 'c':
            lang = 'c2m'
        else:
            lang = 'm2c'

    ip = flask.request.remote_addr
    accepttw = flask.g.get('accepttw')
    L = (lambda x: zhconv(x, 'zh-tw')) if accepttw else (lambda x: x)
    origcnt = userlog.count(ip)
    count = 0
    valid = wy_validate(ip, origcnt, userlog)
    talign = flask.Markup('[]')
    if valid == 1:
        origcnt = 0
        userlog.delete(ip)
    elif valid == 0:
        logging.warning('Captcha failed: %s' % ip)
    if not tinput:
        toutput = ''
    elif valid == 0:
        toutput = L('<p class="error">回答错误,请重试。</p>')
    elif lang is None:
        toutput = linebreak(tinput)
    elif len(tinput) > MAX_CHAR * (CHAR_RATIO if lang == 'c2m' else 1):
        toutput = L('<p class="error">文本过长,请切分后提交。</p>')
    else:
        tinput, tres, count = mosesproxy.translate(
            tinput, lang, True, True, True)
        toutput, talign = translateresult(tres, L)
        userlog.add(ip, count)
    captcha = ''
    if origcnt + count > userlog.maxcnt:
        captcha = L(wy_gencaptcha())
    return flask.render_template(('translate_zhtw.html' if accepttw else 'translate.html'), tinput=tinput, toutput=toutput, talign=talign, captcha=flask.Markup(captcha))
コード例 #3
0
    "CREATE TABLE sentences (id INTEGER PRIMARY KEY, sent TEXT, type INTEGER)")
db.commit()

cf = open(os.path.join(OS_DATA, "zhc.txt")).read().split('\n')
mf = open(os.path.join(OS_DATA, "zhm.txt")).read().split('\n')

count = 0
ccount = 0
mcount = 0

for c, m in zip(cf, mf):
    if len(c) < 50:
        cache.set(c, m)
    if 15 < len(c) < 25:
        txt = c.strip('“”‘’;:')
        cscore, mscore = calctxtstat(txt)
        if abs(cscore - mscore) < 20:
            cur.execute(
                "REPLACE INTO sentences (id, sent, type) VALUES (?, ?, ?)",
                (crc32(txt), txt, 0))
            ccount += 1
    if 15 < len(m) < 25:
        txt = m.strip('“”‘’;:')
        cscore, mscore = calctxtstat(txt)
        if abs(cscore - mscore) < 20:
            cur.execute(
                "REPLACE INTO sentences (id, sent, type) VALUES (?, ?, ?)",
                (crc32(txt), txt, 1))
            mcount += 1
    count += 1
    if count % 100000 == 0:
コード例 #4
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sqlite3
import zhutil
from config import *

db_ts = sqlite3.connect(DB_testsent)
cur_ts = db_ts.cursor()

count = 0
correct = 0
delta = []

for sent, typ in cur_ts.execute("SELECT sent, type FROM sentences"):
    cscore, mscore = zhutil.calctxtstat(sent)
    delta.append(cscore - mscore)
    count += 1
    if cscore > mscore and typ == 0 or cscore < mscore and typ == 1:
        correct += 1

print('Correct/Count:', correct, count)
print('             :', correct / count)

mean = sum(delta) / len(delta)
stdev = (sum((x - mean)**2 for x in delta) / len(delta))**.5

print('Mean, Stdev  :', mean, stdev)
コード例 #5
0
ファイル: buildcache.py プロジェクト: gumblex/pywebapps
    "CREATE TABLE sentences (id INTEGER PRIMARY KEY, sent TEXT, type INTEGER)")
db.commit()

cf = open(os.path.join(OS_DATA, "zhc.txt")).read().split('\n')
mf = open(os.path.join(OS_DATA, "zhm.txt")).read().split('\n')

count = 0
ccount = 0
mcount = 0

for c, m in zip(cf, mf):
    if len(c) < 50:
        cache.set(c, m)
    if 15 < len(c) < 25:
        txt = c.strip('“”‘’;:')
        cscore, mscore = calctxtstat(txt)
        if abs(cscore - mscore) < 20:
            cur.execute(
                "REPLACE INTO sentences (id, sent, type) VALUES (?, ?, ?)", (crc32(txt), txt, 0))
            ccount += 1
    if 15 < len(m) < 25:
        txt = m.strip('“”‘’;:')
        cscore, mscore = calctxtstat(txt)
        if abs(cscore - mscore) < 20:
            cur.execute(
                "REPLACE INTO sentences (id, sent, type) VALUES (?, ?, ?)", (crc32(txt), txt, 1))
            mcount += 1
    count += 1
    if count % 100000 == 0:
        print(count)