Ejemplo n.º 1
0
def test():
    for testn in sorted(listdir('cases')):
        if not exists(join('cases', testn, 'input')):
            continue
        print
        print '-' * 40
        print
        print 'Running test', testn, '...'
        with codecs.open(join('cases', testn, 'input'), 'r', 'utf-8') as f:
            inp = f.read()
        print 'input size:', len(inp)
        parserules = True
        for var in VALIDVARIANTS:
            print
            print 'testing', var, '...',
            start = time()
            try:
                convhandler = ConverterHandler(var)
                outp = convhandler.convert(inp, parserules)
            except Exception:
                print 'error occurred.'
                print_exc()
            else:
                print '%.3f sec.' % (time() - start)
                if exists(join('cases', testn, 'output.' + var)):
                    print 'comparing converter\'s output with expected output ...'
                    with codecs.open(join('cases', testn, 'output.' + var),
                                     'r', 'utf-8') as f:
                        cp = f.read()
                    if cp == outp:
                        print 'OK'
                    else:
                        print 'FAILED'
Ejemplo n.º 2
0
def test():
    for testn in sorted(listdir('cases')):
        if not exists(join('cases', testn, 'input')):
            continue
        print
        print '-' * 40
        print
        print 'Running test', testn, '...'
        with codecs.open(join('cases', testn, 'input'), 'r', 'utf-8') as f:
            inp = f.read()
        print 'input size:', len(inp)
        parserules = True
        for var in VALIDVARIANTS:
            print
            print 'testing', var, '...',
            start = time()
            try:
                convhandler = ConverterHandler(var)
                outp = convhandler.convert(inp, parserules)
            except Exception:
                print 'error occurred.'
                print_exc()
            else:
                print '%.3f sec.' % (time() - start)
                if exists(join('cases', testn, 'output.' + var)):
                    print 'comparing converter\'s output with expected output ...'
                    with codecs.open(join('cases', testn, 'output.' + var), 'r', 'utf-8') as f:
                        cp = f.read()
                    if cp == outp:
                        print 'OK'
                    else:
                        print 'FAILED'
Ejemplo n.º 3
0
def genLoc(filePath):
    c = open(filePath)
    conv = ConverterHandler('zh-hant')
    word = u'女装连衣裙'
    for x in xrange(100000):
        line = word
        if len(line):
            keyword = ''
            keyword = urllib.quote(conv.convert(line.strip()).encode('utf-8'))

            if not len(keyword):
                continue
            url = 'http://example.com/?q=%s' % keyword
            yield url
Ejemplo n.º 4
0
def convert_name(word):
    name = ConverterHandler('zh-hans').convert(convert(word))
    return name
Ejemplo n.º 5
0
from langconv import ConverterHandler

DB = MySQLdb.connect(user="******",passwd="nishixian",
	db="kancolle_wiki",unix_socket="/opt/lampp/var/mysql/mysql.sock",charset="utf8")

cursor = DB.cursor()
query = "SELECT ship_id,ship_name FROM kancolle_ship_info";

conv_hack = [(u'巻',u'卷'), (u'蔵',u'藏'), (u'黒',u'黑'), (u'暁',u'晓'), (u'満',u'满'), (u'皐',u'皋'), (u'歳',u'岁')]

if cursor.execute(query) > 0:
	result = list(cursor.fetchall())
	for item in result:
		kanid = item[0]
		kanname = item[1]
		convhandler = ConverterHandler('zh-hans')
		output = convhandler.convert(kanname,False)
		for conv in conv_hack:
			output = output.replace(conv[0],conv[1])
		print kanname,'==>',output
		query = "UPDATE kancolle_ship_info SET ship_name_sim = '%s' WHERE ship_id = %s" % (output, kanid)
		try:
			n = cursor.execute(query)
		except Exception:
			errcode,errordesc = e
			print 'Error!',errordesc
			DB.rollback()
		else:
			if n>0 :
				print 'Convert Success!'
				DB.commit()