rec = rec_set[fid] result = calculator.calculate(lab, rec) acc = float(result['cor'] - result['ins']) * 100.0 / result['all'] print('acc: %4.2f %%' % acc, end = ' ') # print('acc: {acc}'.format(acc = acc*100), end = ' ') print('N: {total}'.format(total = result['all']), end = ' ') print('C: {correct}'.format(correct = result['cor']), end = ' ') print('S: {substitution}'.format(substitution = result['sub']), end = ' ') print('D: {deletion}'.format(deletion = result['del']), end = ' ') print('I: {insertion}'.format(insertion = result['ins'])) space = {} space['lab'] = [] space['rec'] = [] for idx in xrange(len(result['lab'])) : len_lab = len(result['lab'][idx]) if is_chinese(result['lab'][idx]) : len_lab = len_lab * 2 len_rec = len(result['rec'][idx]) if is_chinese(result['rec'][idx]) : len_rec = len_rec * 2 length = max(len_lab, len_rec) space['lab'].append(length-len_lab) space['rec'].append(length-len_rec) print('lab:', end = ' ') for idx, token in enumerate(result['lab']) : print('{token}'.format(token = token.encode('utf-8')), end = ' ') for n in xrange(space['lab'][idx]) : print(' ', end = '') print() print('rec:', end = ' ') for idx, token in enumerate(result['rec']) :
# coding = utf-8 from Accuracy import is_chinese import re import sys if __name__ == '__main__': cluster = {} cluster['ch'] = {} cluster['en'] = {} for line in sys.stdin: line = line.decode('utf-8') word = line[0:line.find(' ') - 1] if is_chinese(word): for char in word: cluster['ch'][char] = 1 else: cluster['en'][word] = 1 print('<English>') for token in cluster['en']: print(token.encode('utf-8')) print('<Mandarin>') for token in cluster['ch']: print(token.encode('utf-8'))
# coding = utf-8 from Accuracy import is_chinese import re import sys if __name__ == '__main__': cluster = {} cluster['ch'] = {} cluster['en'] = {} for line in sys.stdin : line = line.decode('utf-8') word = line[0:line.find(' ')-1] if is_chinese(word) : for char in word : cluster['ch'][char] = 1 else : cluster['en'][word] = 1 print('<English>') for token in cluster['en'] : print(token.encode('utf-8')) print('<Mandarin>') for token in cluster['ch'] : print(token.encode('utf-8'))