def main(args): global verbose verbose = args.verbose i = 0 table_term_vowel = utils.load_csv_to_dict( path.join(DIR_ROOT, 'data/term_vowel_table.csv')) with codecs.open(args.filename, encoding='utf-8') as f: for line in f: i += 1 if verbose: if i % 10 == 0: logger.info(i) if len(line) == 0: continue result1 = get_phonetic_transcription(line.decode('utf-8'), table_term_vowel) result2 = get_phonetic_transcription_juman(line.decode('utf-8')) print('{}'.format(line.strip())) print(''.join(result1.split())) print(result2) print('') return 0
from os import path import sys import codecs import rhyme import utils u"""Make features DEMO: python make_features.py """ DIR_SCRIPT = path.dirname(path.abspath(__file__)) DIR_ROOT = DIR_SCRIPT # TODO: move this file to ./features table_term_vowel = utils.load_csv_to_dict( path.join(DIR_ROOT, 'data/term_vowel_table.csv')) def calc_Jaccard_similarity(BoW1, BoW2): # 重複を許さない場合 # all_words = list(set(BoW1.extend(BoW2))) # common_words = list(set(BoW1) & set(BoW2)) # 重複を許す場合 assert isinstance(BoW1, list) assert isinstance(BoW2, list) all_words = BoW1 + BoW2 common_words = [] for word in BoW1: if word in BoW2: common_words.append(word)
import sys import codecs import rhyme import utils u"""Make features DEMO: python make_features.py """ DIR_SCRIPT = path.dirname(path.abspath(__file__)) DIR_ROOT = DIR_SCRIPT # TODO: move this file to ./features table_term_vowel = utils.load_csv_to_dict(path.join(DIR_ROOT, 'data/term_vowel_table.csv')) def calc_Jaccard_similarity(BoW1, BoW2): # 重複を許さない場合 # all_words = list(set(BoW1.extend(BoW2))) # common_words = list(set(BoW1) & set(BoW2)) # 重複を許す場合 assert isinstance(BoW1, list) assert isinstance(BoW2, list) all_words = BoW1+BoW2 common_words = [] for word in BoW1: if word in BoW2: common_words.append(word) BoW2.remove(word)
inv_vocab = cPickle.load(open("inv_vocab.pkl", "rb")) n_units = model.embed.W.shape[1] xp = cuda.cupy if args.gpu >= 0 else np if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() model.to_gpu() if args.gpu < 0: model.to_cpu() # load mild yankee dict mild_yankee_dict = utils.load_csv_to_dict("mild_dict.csv") def force_mild(state, line, word_id): cur_word = xp.array([word_id], dtype=np.int32) state, predict = forward_one_step(cur_word, state, train=False) if mild_yankee_dict.has_key(unicode(inv_vocab[word_id])): for next_word in mild_yankee_dict[unicode(inv_vocab[word_id])].split(): line.append(next_word) next_word_id = xp.array([vocab[next_word]], dtype=np.int32) state, predict = forward_one_step(next_word_id, state, train=False) return predict def forward_one_step(x_data, state, train=True): if args.gpu >= 0: