def eval(): # Load graph g = Graph(is_training=False) print("Graph loaded") # Load data X, Sources, Targets = load_test_data() de2idx, idx2de = load_vocab('src') en2idx, idx2en = load_vocab('trg') # X, Sources, Targets = X[:33], Sources[:33], Targets[:33] # Start session with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ## Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") ## Get model name mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name ## Inference if not os.path.exists('results'): os.mkdir('results') with codecs.open("results/" + mname, "w", "utf-8") as fout: list_of_refs, hypotheses = [], [] for i in range(len(X) // hp.batch_size): ### Get mini-batches x = X[i*hp.batch_size: (i+1)*hp.batch_size] sources = Sources[i*hp.batch_size: (i+1)*hp.batch_size] targets = Targets[i*hp.batch_size: (i+1)*hp.batch_size] ### Autoregressive inference preds = np.zeros((hp.batch_size, hp.maxlen), np.int32) for j in range(hp.maxlen): _preds = sess.run(g.preds, {g.x: x, g.y: preds}) preds[:, j] = _preds[:, j] ### Write to file for source, target, pred in zip(sources, targets, preds): # sentence-wise got = " ".join(idx2en[idx] for idx in pred).split("</S>")[0].strip() fout.write("- source: " + source +"\n") fout.write("- expected: " + target + "\n") fout.write("- got: " + got + "\n\n") fout.flush() # bleu score ref = target.split() hypothesis = got.split() if len(ref) > 3 and len(hypothesis) > 3: list_of_refs.append([ref]) hypotheses.append(hypothesis) ## Calculate bleu score score = corpus_bleu(list_of_refs, hypotheses) fout.write("Bleu Score = " + str(100*score))
def __init__(self, hp): self.hp = hp self.en_token2idx, self.en_idx2token = load_vocab(hp.en_vocab) self.ch_token2idx, self.ch_idx2token = load_vocab(hp.ch_vocab) self.en_embeddings, self.ch_embeddings = get_token_embeddings( self.hp.en_vocab_size, self.hp.ch_vocab_size, self.hp.d_model, zero_pad=True)
def load_model(model_path): # Load data source2idx, idx2source = load_vocab(params.src_vocab) target2idx, idx2target = load_vocab(params.tgt_vocab) encoder_vocab = len(source2idx) decoder_vocab = len(target2idx) # load model model = Transformer(params, encoder_vocab, decoder_vocab) model.load_state_dict(torch.load(model_path)) print('Model Loaded.') model.eval() model.cuda() return model, source2idx, idx2target
def load_distinct_data(mode="train"): word2idx, idx2word = load_vocab() Y = [] for line in codecs.open(hp.data, 'r', 'utf-8'): sent = line.strip().split(" ") sent = sent[1:] sent = ' '.join(sent) sent = normalize(sent) words = sent.split() if len(words) <= hp.maxlen: sent_ids = [word2idx.get(word, 0) for word in words] if 0 not in sent_ids: # We do not include a sentence if it has any unknown words. Y.append(np.array(sent_ids, np.int32).tostring()) #print("###F",Y[0]) random.shuffle(Y) #print("###S",Y[0]) ''' if mode=="train": Y = Y[:-hp.batch_size] else: # test Y = Y[-hp.batch_size:] ''' print("# Y =", len(Y)) return Y
def main(): g = Graph(is_training=False) # Load vocab pnyn2idx, idx2pnyn, hanzi2idx, idx2hanzi = load_vocab() with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") # Get model mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name while True: line = input("请输入测试拼音:") if len(line) > hp.maxlen: print('最长拼音不能超过50') continue x = load_test_string(pnyn2idx, line) #print(x) preds = sess.run(g.preds, {g.x: x}) #got = "".join(idx2hanzi[str(idx)] for idx in preds[0])[:np.count_nonzero(x[0])].replace("_", "") got = "".join( idx2hanzi[idx] for idx in preds[0])[:np.count_nonzero(x[0])].replace( "_", "") print(got)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token, self.hp.vocab_size = load_vocab( hp.vocab) self.embd = None if self.hp.preembedding: self.embd = loadGloVe(self.hp.vec_path) self.embeddings = get_token_embeddings(self.embd, self.hp.vocab_size, self.hp.d_model, zero_pad=False) self.input_sup = tf.placeholder(tf.int32, [None, self.hp.maxlen], name="input_sup") self.input_ori = tf.placeholder(tf.int32, [None, self.hp.maxlen], name="input_ori") self.input_aug = tf.placeholder(tf.int32, [None, self.hp.maxlen], name="input_aug") self.sup_len = tf.placeholder(tf.int32, [None]) self.ori_len = tf.placeholder(tf.int32, [None]) self.aug_len = tf.placeholder(tf.int32, [None]) self.truth = tf.placeholder(tf.int32, [None, self.hp.num_class], name="truth") self.is_training = tf.placeholder(tf.bool, shape=None, name="is_training") self.model = True # self.logits_sup, self.logits_ori, self.logits_aug = self._logits_op() self.loss = self._loss_op() self.acc = self._acc_op() self.global_step = self._globalStep_op() self.train = self._training_op()
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) # 字向量(tooke向量),将待翻译的每个字映射到目标词表中 self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
def main(): g = Graph(is_training=False) # Load vocab pnyn2idx, idx2pnyn, hanzi2idx, idx2hanzi = load_vocab() with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!") # Get model mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name while True: line = input("请输入测试拼音:") if len(line) > hp.maxlen: print('最长拼音不能超过50') continue x = load_test_string(pnyn2idx, line) #print(x) preds = sess.run(g.preds, {g.x: x}) #got = "".join(idx2hanzi[str(idx)] for idx in preds[0])[:np.count_nonzero(x[0])].replace("_", "") got = "".join(idx2hanzi[idx] for idx in preds[0])[:np.count_nonzero(x[0])].replace("_", "") print(got)
def _net1(self): with tf.variable_scope('net1'): # Load vocabulary phn2idx, idx2phn = load_vocab() # Pre-net prenet_out = prenet(self.x_mfcc, num_units=[ hp.Train1.hidden_units, hp.Train1.hidden_units // 2 ], dropout_rate=hp.Train1.dropout_rate, is_training=self.is_training) # (N, T, E/2) # CBHG out = cbhg(prenet_out, hp.Train1.num_banks, hp.Train1.hidden_units // 2, hp.Train1.num_highway_blocks, hp.Train1.norm_type, self.is_training) # Final linear projection logits = tf.layers.dense(out, len(phn2idx)) # (N, T, V) ppgs = tf.nn.softmax(logits / hp.Train1.t) # (N, T, V) preds = tf.to_int32(tf.arg_max(logits, dimension=-1)) # (N, T) return ppgs, preds, logits
def eval(logdir): # Load graph model = Net1() # dataflow df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size) ckpt = tf.train.latest_checkpoint(logdir) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names()) if ckpt: pred_conf.session_init = SaverRestore(ckpt) predictor = OfflinePredictor(pred_conf) x_mfccs, y_ppgs = next(df().get_data()) y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs) # plot confusion matrix _, idx2phn = load_vocab() y_ppg_1d = [idx2phn[i] for i in y_ppg_1d] pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d] summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.add_summary(summ_acc) writer.add_summary(summ_cm) writer.close()
def plot_alignment(alignment, epoch, eng_name, kor_name): """Plots the alignment alignments: A list of (numpy) matrix of shape (encoder_steps, decoder_steps) epoch: epochs """ _, x_i2w, _, y_i2w = load_vocab() non_padded_eng_name = eng_name[np.nonzero(eng_name)] non_padded_kor_name = kor_name[np.nonzero(kor_name)] txt_eng_name = " ".join( x_i2w[idx] for idx in non_padded_eng_name).encode('utf-8').split('E')[0] txt_kor_name = " ".join(y_i2w[idx] for idx in non_padded_kor_name).encode('utf-8') txt_kor_name = txt_kor_name.replace('S', '').replace('E', '') fig, ax = plt.subplots() im = ax.imshow(alignment[:non_padded_eng_name.shape[0] - 1, :non_padded_kor_name.shape[0] - 1], cmap='Greys') fig.colorbar(im) plt.title('{} epochs \n {} \n {}'.format(epoch, txt_eng_name, txt_kor_name)) plt.savefig('{}/alignment_{}k.png'.format(hp.logdir + '/' + hp.modelname, epoch), format='png') plt.close()
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, self.token2idx, self.hp.embedding_file, zero_pad=True)
def __init__(self, context): self.context = context self.token2idx, self.idx2token = load_vocab(context.vocab) vocab_size = len(self.token2idx) # 其实这里的d_model可以是其它维度 self.embeddings = get_token_embeddings(vocab_size, self.context.d_ff, zero_pad=False)
def __init__(self, hp): self.hp = hp # 预测时词表用错! 应该用目标语言的词表而不是源语言的词表!!! 浪费了我四天的时间!! # 而且应该用dev的词表而不是train的!! 其实用train也可以的吧 因为train基本包括了dev的 dev的词表小会报keyerror self.token2idx, self.idx2token = load_vocab(hp.vocab1) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) print('embeddings size =', self.hp.vocab_size)
def __init__(self, num=1, mode="train"): ''' Args: num: Either 1 or 2. 1 for Text2Mel 2 for SSRN. mode: Either "train" or "synthesize". ''' # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Set flag training = True if mode == "train" else False # Graph # Data Feeding ## L: Text. (B, N), int32 ## mels: Reduced melspectrogram. (B, T/r, n_mels) float32 ## mags: Magnitude. (B, T, n_fft//2+1) float32 self.L = tf.placeholder(tf.int32, shape=(None, None)) self.mels = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels)) self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None, )) with tf.variable_scope("Text2Mel"): # Get S or decoder inputs. (B, T//r, n_mels) self.S = tf.concat( (tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1) # Networks with tf.variable_scope("TextEnc"): self.K, self.V = TextEnc(self.L, training=training) # (N, Tx, e) with tf.variable_scope("AudioEnc"): self.Q = AudioEnc(self.S, training=training) with tf.variable_scope("Attention"): # R: (B, T/r, 2d) # alignments: (B, N, T/r) # max_attentions: (B,) self.R, self.alignments, self.max_attentions = Attention( self.Q, self.K, self.V, mononotic_attention=(not training), prev_max_attentions=self.prev_max_attentions) with tf.variable_scope("AudioDec"): self.Y_logits, self.Y = AudioDec( self.R, training=training) # (B, T/r, n_mels) # During inference, the predicted melspectrogram values are fed. with tf.variable_scope("SSRN"): self.Z_logits, self.Z = SSRN(self.Y, training=training) with tf.variable_scope("gs"): self.global_step = tf.Variable(0, name='global_step', trainable=False)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) self.input_x = tf.placeholder(dtype=tf.int32, shape=(None, None), name="input_x") self.decoder_input = tf.placeholder(dtype=tf.int32, shape=(None, None), name="decoder_input") self.target = tf.placeholder(dtype=tf.int32, shape=(None, None), name="target") self.is_training = tf.placeholder(dtype=tf.bool, name="is_training") # encoder self.encoder_hidden = self.encode(self.input_x, training=self.is_training) # decoder self.logits = self.decode(self.decoder_input, self.encoder_hidden, training=self.is_training) self.y_hat = tf.to_int32(tf.argmax(self.logits, axis=-1), name="y_predict_v2") # loss self.smoothing_y = label_smoothing( tf.one_hot(self.target, depth=self.hp.vocab_size)) self.ce_loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=self.smoothing_y) nonpadding = tf.to_float( tf.not_equal(self.target, self.token2idx["<pad>"])) self.loss = tf.reduce_sum( self.ce_loss * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7) # optimize self.global_step = tf.train.get_or_create_global_step() self.lr = noam_scheme(self.hp.lr, self.global_step, self.hp.warmup_steps) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.minimize(self.loss, global_step=self.global_step) # tensorboard tf.summary.scalar('lr', self.lr) tf.summary.scalar("loss", self.loss) tf.summary.scalar("global_step", self.global_step) self.summaries = tf.summary.merge_all() # predict part self.y_predict = tf.identity(self.greedy_search(), name="y_predict")
def __init__(self, hp, inj_type=None, quant_min_max=None, inj_layer=None): self.hp = hp self.inj_type = inj_type self.quant_min_max = quant_min_max self.inj_layer = inj_layer self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
def create_qa_context(model_path: str, word_to_ix_path: str, embed_dim: int, hidden_dim: int, device) -> QAContext: word_dict = load_vocab(word_to_ix_path) vocab_size = len(word_dict) model = CNNBiLSTMAtt(vocab_size, embed_dim, hidden_dim) if not torch.cuda.is_available(): model.load_state_dict(torch.load(model_path, map_location='cpu')) else: model.load_state_dict(torch.load(model_path)) return QAContext(model, word_dict, device)
def __init__(self, log_dir="log_dir", sample_dir="samples"): self._sess_loaded = False self.log_dir = log_dir self.sample_dir = sample_dir char2idx, idx2char = load_vocab() self.char2idx = char2idx self.idx2char = idx2char self.g = Graph(mode="synthesize") print("Graph loaded") self.load_session()
def eval(mode): ''' Get a Spearman rank-order correlation coefficient. Args: mode: A string. Either `val` or `test`. ''' # Set save directory savedir = hp.valdir if mode == "val" else hp.testdir # Load graph g = Graph(is_training=False) print("Graph loaded") # Load data X, Y = load_data(mode=mode) nucl2idx, idx2nucl = load_vocab() with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") # Get model mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name # Inference if not os.path.exists(savedir): os.mkdir(savedir) with open("{}/{}".format(savedir, mname), 'w') as fout: fout.write("{}\t{}\t{}]\n".format("probe", "expected intensity", "predicted intensity")) expected, got = [], [] for step in range(len(X) // hp.batch_size): x = X[step * hp.batch_size:(step + 1) * hp.batch_size] y = Y[step * hp.batch_size:(step + 1) * hp.batch_size] # predict nucl logits = sess.run(g.logits, {g.x: x}) for xx, yy, ll in zip(x, y, logits): # sequence-wise fout.write("{}\t{}\t{}\n".format( "".join(idx2nucl[idx] for idx in xx), yy, ll)) expected.append(yy) got.append(ll) # Spearman rank coefficient score, _ = spearmanr(expected, got) fout.write("Spearman rank correlation coefficients: " + str(score))
def eval(): if not os.path.exists('./results'): os.makedirs('./results') # Load graph print("Graph loaded") print("Model name:{}".format(hp.modelname)) # Load data print("Testing Data...") txt_src_names, idx_src_names, txt_tgt_names, _ = load_evaluate_data( eval_mode="test") x_w2i, x_i2w, y_w2i, y_i2w = load_vocab() g = Graph(is_training=False) with g.graph.as_default(), tf.Session() as sess: sv = tf.train.Saver() # Restore parameters print("Parameter Restoring...") sv.restore(sess, tf.train.latest_checkpoint(hp.logdir + '/' + hp.modelname)) # Inference count = 0 with open('./results/' + hp.modelname + '_result.txt', "w") as fout: for i in range(0, len(txt_src_names), hp.batch_size): batch_txt_src_names = txt_src_names[i:i + hp.batch_size] batch_idx_src_names = idx_src_names[i:i + hp.batch_size] batch_txt_tgt_names = txt_tgt_names[i:i + hp.batch_size] batch_predicted_ids = sess.run(g.pred_outputs, { g.x: batch_idx_src_names }).predicted_ids[:, :, :] for source, target, predicted_ids in zip( batch_txt_src_names, batch_txt_tgt_names, batch_predicted_ids): print( str(count) + '\t' + source + '\t' + hangul.join_jamos(target)) count += 1 candidates = [] predicted_ids = predicted_ids.transpose(1, 0) for pred in predicted_ids: candidate = "".join(y_i2w[idx] for idx in pred).split("E")[0] candidate = hangul.join_jamos(candidate) candidates.append(candidate) fout.write(source + '\t') fout.write(hangul.join_jamos(target)) for candidate in candidates: fout.write('\t') fout.write(candidate.encode('utf-8')) fout.write('\n') fout.flush()
def main(): g = Graph(is_training=False) # Load data nums, X, ys = load_test_data() pnyn2idx, idx2pnyn, hanzi2idx, idx2hanzi = load_vocab() with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") # Get model mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name with codecs.open( 'eval/{}_{}.csv'.format( mname, "qwerty" if hp.isqwerty else "nine"), 'w', 'utf-8') as fout: fout.write( "NUM,EXPECTED,{}_{},# characters,edit distance\n".format( mname, "qwerty" if hp.isqwerty else "nine")) total_edit_distance, num_chars = 0, 0 for step in range(len(X) // hp.batch_size): num = nums[step * hp.batch_size:(step + 1) * hp.batch_size] #number batch x = X[step * hp.batch_size:(step + 1) * hp.batch_size] # input batch y = ys[step * hp.batch_size:(step + 1) * hp.batch_size] # batch of ground truth strings preds = sess.run(g.preds, {g.x: x}) for n, xx, pred, expected in zip(num, x, preds, y): # sentence-wise got = "".join( idx2hanzi[idx] for idx in pred)[:np.count_nonzero(xx)].replace( "_", "") edit_distance = distance.levenshtein(expected, got) total_edit_distance += edit_distance num_chars += len(expected) fout.write(u"{},{},{},{},{}\n".format( n, expected, got, len(expected), edit_distance)) fout.write(u"Total CER: {}/{}={},,,,\n".format( total_edit_distance, num_chars, round(float(total_edit_distance) / num_chars, 2)))
def Graph(self): graph = tf.Graph() with graph.as_default(): if self.is_training: next_element, iterator, num_batch = get_batch_data(self.is_training) self.X, self.Y, self.seq_len = next_element["X"], next_element["Y"], next_element["seq_len"] else: self.X = tf.placeholder(tf.int32, shape=(None, config.maxlen)) self.Y = tf.placeholder(tf.int32, shape=(None, config.maxlen)) self.seq_len = tf.placeholder(tf.int32, shape=(None)) idx2word, word2idx, idx2labl, labl2idx = load_vocab() embed = embedding(self.X,len(word2idx),config.embed_dim, config.use_pretrain) if config.embeddig_mode=="concat": assert config.embed_dim==config.position_dim #TODO this part still dont know how to complete better! elif config.embeddig_mode=="add": embed+=position_encoding(self.X,config.position_dim, config.sinusoid) # input embedding Dropout embed = tf.layers.dropout(embed,rate=config.dropout_rate,training=self.is_training) #Muilty layer Bilstm outputs = multibilstm(embed,self.seq_len,config.num_units,config.num_layer,self.is_training,config.cell) #full connect layer # here we use two layer full connect layer. residual and activation can be set by your self. outputs = feedforward(outputs,outputs.get_shape().as_list()[2],scope="first")#residual default used outputs = feedforward(outputs,config.num_class,residual=False,scope="second") noutput = tf.reshape(outputs, [-1, config.maxlen, config.num_class]) # crf layer if config.use_crf: loss, acc, predicts,true_labels = crf_layer(self.Y,noutput,config.num_class,self.seq_len,self.is_training) else: loss, acc, predicts, true_labels = loss_layer(self.Y, noutput, config.num_class) tf.summary.scalar('acc',acc) global_step = tf.Variable(0, name='global_step') if self.is_training: # use exponential_decay to help the model fit quicker if config.exponential_decay: learning_rate = tf.train.exponential_decay( config.lr,global_step, 200, 0.96, staircase=True ) # optimizer = tf.train.AdamOptimizer(learning_rate=config.lr, beta1=0.9, beta2=0.99, epsilon=1e-8) optimizer = tf.train.RMSPropOptimizer(learning_rate=config.lr) train_op = optimizer.minimize(loss, global_step=global_step) tf.summary.scalar('mean_loss',loss) else: train_op=None return graph,train_op,loss, acc, predicts,true_labels,global_step
def encode(self, x, training=True): ''' Returns memory: encoder outputs. (N, T1, d_model) ''' scopes = [] outputs = [] with tf.variable_scope("embeddings", reuse=tf.AUTO_REUSE): self.token2idx, self.idx2token = load_vocab(self.hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) scopes.append(tf.get_variable_scope().name) outputs.append(self.embeddings) with tf.variable_scope("encoder_embedding_lookup", reuse=tf.AUTO_REUSE): # src_masks src_masks = tf.math.equal(x, 0) # (N, T1) # embedding enc = tf.nn.embedding_lookup(self.embeddings, x) # (N, T1, d_model) enc *= self.hp.d_model**0.5 # scale enc += positional_encoding(enc, self.hp.maxlen1) enc = tf.layers.dropout(enc, self.hp.dropout_rate, training=training) scopes.append(tf.get_variable_scope().name) outputs.append(enc) ## Blocks for i in range(self.hp.num_blocks): with tf.variable_scope("encoder_num_blocks_{}".format(i), reuse=tf.AUTO_REUSE): # self-attention enc = multihead_attention(queries=enc, keys=enc, values=enc, key_masks=src_masks, num_heads=self.hp.num_heads, dropout_rate=self.hp.dropout_rate, training=training, causality=False) # feed forward enc = ff(enc, num_units=[self.hp.d_ff, self.hp.d_model]) scopes.append(tf.get_variable_scope().name) outputs.append(enc) memory = enc return memory, src_masks, outputs, scopes
def test_load_vocab(self): test_vocab = [ '<PAD>', '<UNK>', '<S>', '</S>', 'the', 'to', 'of', 'and', 'a' ] test2idx = {word: idx for idx, word in enumerate(test_vocab)} idx2test = {idx: word for idx, word in enumerate(test_vocab)} word2idx, idx2word = load_vocab('src') for key, value in test2idx.items(): self.assertEqual(value, word2idx[key]) for key, value in idx2test.items(): self.assertEqual(value, idx2test[key])
def main(): seq_path = f'{config.data_dir}/train/in.txt' tag_path = f'{config.data_dir}/train/out.txt' vocab_path = f'{config.data_dir}/vocabs' args = get_args() epochs = args.epochs batch_size = args.batch_size lr = args.lr max_seq_len = args.max_len embed_dim = config.embed_dim hidden_dim = config.hidden_dim output_dir = config.ouput_dir device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") logger.info(f"***** Loading vocab *****") word_to_ix = load_vocab(vocab_path) vocab_size = len(word_to_ix) logger.info(f"***** Initializing dataset *****") train_dataloader = init_dataset(seq_path, tag_path, word_to_ix, max_seq_len, batch_size) logger.info(f"***** Training *****") model = CNNBiLSTMAtt(vocab_size, embed_dim, hidden_dim) model.to(device) model.train() optimizer = optim.Adam(model.parameters(), lr=lr) loss_func = nn.CrossEntropyLoss(ignore_index=word_to_ix['[PAD]']) for epoch in range(epochs): logger.info(f"***** Epoch {epoch} *****") for step, batch in enumerate(train_dataloader): optimizer.zero_grad() batch = tuple(t.to(device) for t in batch) seq_ids, exted_att_mask, tag_ids = batch logits = model(seq_ids, exted_att_mask) loss = loss_func(logits.view(-1, vocab_size), tag_ids.view(-1)) loss.backward() optimizer.step() if step % 100 == 0: logger.info( f"[epoch]: {epoch}, [batch]: {step}, [loss]: {loss.item()}" ) save_model(model, output_dir, epoch + 1)
def evaluate(): # Load graph g = Graph(mode="evaluate_las"); print("Graph loaded") # Load data _, idx2char = load_vocab() fpaths, _, texts = load_data(mode="evaluate_las") all_mel_spec = [load_pre_spectrograms(fpath)[1] for fpath in fpaths] maxlen = max([len(m) for m in all_mel_spec]) new_mel_spec = np.zeros((len(all_mel_spec), maxlen, hp.n_mels), np.float) for i, m_spec in enumerate(all_mel_spec): new_mel_spec[i, :len(m_spec), :] = m_spec saver_las = tf.train.Saver(var_list=g.las_variable) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver_las.restore(sess, tf.train.latest_checkpoint(hp.las_logdir)) total_y_hat = np.zeros((len(texts), 100), np.float32) batch_idx = list(range(0,len(texts),hp.las_inference_batch_size)) batch_idx.append(len(texts)) for i in tqdm.tqdm(range(len(batch_idx)-1)): y_hat = total_y_hat[batch_idx[i]:batch_idx[i+1]] mel_spec = new_mel_spec[batch_idx[i]:batch_idx[i+1]] for j in range(100): _y_hat = sess.run(g.preds, {g.mels_las: mel_spec, g.texts_las: y_hat}) y_hat[:, j] = _y_hat[:, j] total_y_hat[batch_idx[i]:batch_idx[i+1]] = y_hat all_we = 0 all_wrd = 0 opf = open(os.path.join(hp.las_logdir,"Inference_text_seqs.txt"), "w") #inference output for i, idx_inf in enumerate(total_y_hat): fname = os.path.basename(fpaths[i]) idx_gt = texts[i] str_gt = get_sent(idx2char, idx_gt) str_inf = get_sent(idx2char, idx_inf) all_we += wer(list(str_inf), list(str_gt)) all_wrd += len(str_gt) #all_we += float(wer(list(str_inf), list(str_gt)))/float(len(str_gt)) final_str = fname + '\n' + str_gt + '\n' + str_inf + '\n'*2 opf.write(final_str) print('cer: ' + str(all_we/all_wrd)) opf.write('cer: ' + str(all_we/all_wrd))
def infer(hp): load_hparams(hp, hp.ckpt) # latest checkpoint ckpt_ = tf.train.latest_checkpoint(hp.ckpt) ckpt = ckpt_ if ckpt_ else hp.ckpt # load graph saver = tf.train.import_meta_graph(ckpt + '.meta', clear_devices=True) graph = tf.get_default_graph() # load tensor input_x = graph.get_tensor_by_name("input_x:0") is_training = graph.get_tensor_by_name("is_training:0") y_predict = graph.get_tensor_by_name("y_predict:0") # vocabulary token2idx, idx2token = load_vocab(hp.vocab) logging.info("# Session") with tf.Session() as sess: saver.restore(sess, ckpt) while True: text = input("请输入测试样本:") # tokens to ids tokens = [ch for ch in text] + ["</s>"] x = [token2idx.get(t, token2idx["<unk>"]) for t in tokens] # run calculation predict_result = sess.run(y_predict, feed_dict={ input_x: [x], is_training: False }) # ids to tokens token_pred = [ idx2token.get(t_id, "#") for t_id in predict_result[0] ] translation = "".join(token_pred).split("</s>")[0] logging.info(" 译文: {}".format(translation)) time.sleep(0.1)
def __init__(self): # Load vocabulary self.char2idx, self.idx2char = load_vocab() self.L = tf.placeholder(tf.int32, shape=(None, None)) self.mels = tf.placeholder(tf.float32, shape=(None, None, n_mels)) self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None, )) # network 1 with tf.variable_scope("Text2Mel"): # Get S or decoder inputs. (B, T//r, n_mels) self.S = tf.concat( (tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1) # Networks with tf.variable_scope("TextEnc"): self.K, self.V = TextEnc(self.L) # (N, Tx, e) with tf.variable_scope("AudioEnc"): self.Q = AudioEnc(self.S) with tf.variable_scope("Attention"): # R: (B, T/r, 2d) # alignments: (B, N, T/r) # max_attentions: (B,) self.R, self.alignments, self.max_attentions = Attention( self.Q, self.K, self.V, mononotic_attention=True, prev_max_attentions=self.prev_max_attentions) with tf.variable_scope("AudioDec"): self.Y_logits, self.Y = AudioDec(self.R) # (B, T/r, n_mels) # network 2 # During inference, the predicted melspectrogram values are fed. with tf.variable_scope("SSRN"): self.Z_logits, self.Z = SSRN(self.Y) with tf.variable_scope("gs"): self.global_step = tf.Variable(0, name='global_step', trainable=False)
def _net1(self): with tf.variable_scope('net1'): # Load vocabulary phn2idx, idx2phn = load_vocab() # Pre-net prenet_out = prenet(self.x_mfcc, num_units=[hp.Train1.hidden_units, hp.Train1.hidden_units // 2], dropout_rate=hp.Train1.dropout_rate, is_training=self.is_training) # (N, T, E/2) # CBHG out = cbhg(prenet_out, hp.Train1.num_banks, hp.Train1.hidden_units // 2, hp.Train1.num_highway_blocks, hp.Train1.norm_type, self.is_training) # Final linear projection logits = tf.layers.dense(out, len(phn2idx)) # (N, T, V) ppgs = tf.nn.softmax(logits / hp.Train1.t) # (N, T, V) preds = tf.to_int32(tf.arg_max(logits, dimension=-1)) # (N, T) return ppgs, preds, logits
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab( os.path.join(hp.data_dir, hp.vocab)) self.steps = [] if self.hp.fac_embed: self.embeddings1, self.embeddings2 = get_factorized_token_embeddings( self.hp.vocab_size, self.hp.d_embed, self.hp.d_model, zero_pad=True, normalized=self.hp.norm_embedding, ortho=self.hp.ortho_embedding) else: self.embeddings = get_token_embeddings( self.hp.vocab_size, self.hp.d_model, zero_pad=True, normalized=self.hp.norm_embedding, ortho=self.hp.ortho_embedding)
def create_train_data(): from data_load import load_vocab roma2idx, idx2roma, surf2idx, idx2surf = load_vocab() romaji_sents, surface_sents = [], [] for line in codecs.open('preprocessed/ja.tsv', 'r', 'utf-8'): try: idx, romaji_sent, surface_sent = line.strip().split("\t") except ValueError: continue if len(romaji_sent) < hp.max_len: romaji_sents.append( np.array([roma2idx.get(roma, 1) for roma in romaji_sent + "S"], np.int32).tostring()) surface_sents.append( np.array( [surf2idx.get(surf, 1) for surf in surface_sent + "S"], np.int32).tostring()) pickle.dump((romaji_sents, surface_sents), open('preprocessed/train.pkl', 'wb'), protocol=2)
def main_batches(): g = Graph(is_training=False) # Load data nums, X, ys = load_test_data() pnyn2idx, idx2pnyn, hanzi2idx, idx2hanzi = load_vocab() with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!") # Get model mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name with codecs.open('eval/{}_{}.csv'.format(mname, "qwerty" if hp.isqwerty else "nine"), 'w', 'utf-8') as fout: fout.write("NUM,EXPECTED,{}_{},# characters,edit distance\n".format(mname, "qwerty" if hp.isqwerty else "nine")) total_edit_distance, num_chars = 0, 0 for step in range(len(X)//hp.batch_size): num = nums[step*hp.batch_size:(step+1)*hp.batch_size] #number batch x = X[step*hp.batch_size:(step+1)*hp.batch_size] # input batch y = ys[step*hp.batch_size:(step+1)*hp.batch_size] # batch of ground truth strings preds = sess.run(g.preds, {g.x: x}) for n, xx, pred, expected in zip(num, x, preds, y): # sentence-wise #got = "".join(idx2hanzi[str(idx)] for idx in pred)[:np.count_nonzero(xx)].replace("_", "") got = "".join(idx2hanzi[idx] for idx in pred)[:np.count_nonzero(xx)].replace("_", "") edit_distance = distance.levenshtein(expected, got) total_edit_distance += edit_distance num_chars += len(expected) fout.write(u"{},{},{},{},{}\n".format(n, expected, got, len(expected), edit_distance)) fout.write(u"Total CER: {}/{}={},,,,\n".format(total_edit_distance, num_chars, round(float(total_edit_distance)/num_chars, 2)))
def __init__(self, training=True): # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Graph self.graph = tf.Graph() with self.graph.as_default(): # Data Feeding ## x: Text. (N, Tx), int32 ## y1: Reduced melspectrogram. (N, Ty//r, n_mels*r) float32 ## y2: Reduced dones. (N, Ty//r,) int32 ## z: Magnitude. (N, Ty, n_fft//2+1) float32 if training: self.x, self.y1, self.y2, self.z, self.num_batch = get_batch() self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers, hp.batch_size), dtype=tf.int32) else: # Inference self.x = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.Tx)) self.y1 = tf.placeholder(tf.float32, shape=(hp.batch_size, hp.Ty//hp.r, hp.n_mels*hp.r)) self.prev_max_attentions_li = tf.placeholder(tf.int32, shape=(hp.dec_layers, hp.batch_size,)) # Get decoder inputs: feed last frames only (N, Ty//r, n_mels) self.decoder_input = tf.concat((tf.zeros_like(self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1) # Networks with tf.variable_scope("encoder"): self.keys, self.vals = encoder(self.x, training=training) # (N, Tx, e) with tf.variable_scope("decoder"): # mel_logits: (N, Ty/r, n_mels*r) # done_output: (N, Ty/r, 2), # decoder_output: (N, Ty/r, e) # alignments_li: dec_layers*(Tx, Ty/r) # max_attentions_li: dec_layers*(N, T_y/r) self.mel_logits, self.done_output, self.decoder_output, self.alignments_li, self.max_attentions_li \ = decoder(self.decoder_input, self.keys, self.vals, self.prev_max_attentions_li, training=training) self.mel_output = tf.nn.sigmoid(self.mel_logits) with tf.variable_scope("converter"): # Restore shape self.converter_input = tf.reshape(self.decoder_output, (-1, hp.Ty, hp.embed_size//hp.r)) self.converter_input = fc_block(self.converter_input, hp.converter_channels, activation_fn=tf.nn.relu, training=training) # (N, Ty, v) # Converter self.mag_logits = converter(self.converter_input, training=training) # (N, Ty, 1+n_fft//2) self.mag_output = tf.nn.sigmoid(self.mag_logits) self.global_step = tf.Variable(0, name='global_step', trainable=False) if training: # Loss self.loss_mels = tf.reduce_mean(tf.abs(self.mel_output - self.y1)) self.loss_dones = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.done_output, labels=self.y2)) self.loss_mags = tf.reduce_mean(tf.abs(self.mag_output - self.z)) self.loss = self.loss_mels + self.loss_dones + self.loss_mags # Training Scheme self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) ## gradient clipping self.gvs = self.optimizer.compute_gradients(self.loss) self.clipped = [] for grad, var in self.gvs: grad = tf.clip_by_value(grad, -1. * hp.max_grad_val, hp.max_grad_val) grad = tf.clip_by_norm(grad, hp.max_grad_norm) self.clipped.append((grad, var)) self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step) # Summary tf.summary.scalar('Train_Loss/LOSS', self.loss) tf.summary.scalar('Train_Loss/mels', self.loss_mels) tf.summary.scalar('Train_Loss/dones', self.loss_dones) tf.summary.scalar('Train_Loss/mags', self.loss_mags) self.merged = tf.summary.merge_all()
def __init__(self, mode="train"): # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Set phase is_training=True if mode=="train" else False # Graph # Data Feeding # x: Text. (N, Tx) # y: Reduced melspectrogram. (N, Ty//r, n_mels*r) # z: Magnitude. (N, Ty, n_fft//2+1) if mode=="train": self.x, self.y, self.z, self.fnames, self.num_batch = get_batch() elif mode=="eval": self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r)) self.z = tf.placeholder(tf.float32, shape=(None, None, 1+hp.n_fft//2)) self.fnames = tf.placeholder(tf.string, shape=(None,)) else: # Synthesize self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) # Get encoder/decoder inputs self.encoder_inputs = embed(self.x, len(hp.vocab), hp.embed_size) # (N, T_x, E) self.decoder_inputs = tf.concat((tf.zeros_like(self.y[:, :1, :]), self.y[:, :-1, :]), 1) # (N, Ty/r, n_mels*r) self.decoder_inputs = self.decoder_inputs[:, :, -hp.n_mels:] # feed last frames only (N, Ty/r, n_mels) # Networks with tf.variable_scope("net"): # Encoder self.memory = encoder(self.encoder_inputs, is_training=is_training) # (N, T_x, E) # Decoder1 self.y_hat, self.alignments = decoder1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T_y//r, n_mels*r) # Decoder2 or postprocessing self.z_hat = decoder2(self.y_hat, is_training=is_training) # (N, T_y//r, (1+n_fft//2)*r) # monitor self.audio = tf.py_func(spectrogram2wav, [self.z_hat[0]], tf.float32) if mode in ("train", "eval"): # Loss self.loss1 = tf.reduce_mean(tf.abs(self.y_hat - self.y)) self.loss2 = tf.reduce_mean(tf.abs(self.z_hat - self.z)) self.loss = self.loss1 + self.loss2 # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.lr = learning_rate_decay(hp.lr, global_step=self.global_step) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) ## gradient clipping self.gvs = self.optimizer.compute_gradients(self.loss) self.clipped = [] for grad, var in self.gvs: grad = tf.clip_by_norm(grad, 5.) self.clipped.append((grad, var)) self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step) # Summary tf.summary.scalar('{}/loss1'.format(mode), self.loss1) tf.summary.scalar('{}/loss'.format(mode), self.loss) tf.summary.scalar('{}/lr'.format(mode), self.lr) tf.summary.image("{}/mel_gt".format(mode), tf.expand_dims(self.y, -1), max_outputs=1) tf.summary.image("{}/mel_hat".format(mode), tf.expand_dims(self.y_hat, -1), max_outputs=1) tf.summary.image("{}/mag_gt".format(mode), tf.expand_dims(self.z, -1), max_outputs=1) tf.summary.image("{}/mag_hat".format(mode), tf.expand_dims(self.z_hat, -1), max_outputs=1) tf.summary.audio("{}/sample".format(mode), tf.expand_dims(self.audio, 0), hp.sr) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen,)) self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen,)) # Load vocabulary pnyn2idx, _, hanzi2idx, _ = load_vocab() # Character Embedding for x enc = embed(self.x, len(pnyn2idx), hp.embed_size, scope="emb_x") # Encoder pre-net prenet_out = prenet(enc, num_units=[hp.embed_size, hp.embed_size // 2], is_training=is_training) # (N, T, E/2) # Encoder CBHG ## Conv1D bank enc = conv1d_banks(prenet_out, K=hp.encoder_num_banks, num_units=hp.embed_size // 2, is_training=is_training) # (N, T, K * E / 2) ## Max pooling enc = tf.layers.max_pooling1d(enc, 2, 1, padding="same") # (N, T, K * E / 2) ## Conv1D projections enc = conv1d(enc, hp.embed_size // 2, 5, scope="conv1d_1") # (N, T, E/2) enc = normalize(enc, type=hp.norm_type, is_training=is_training, activation_fn=tf.nn.relu, scope="norm1") enc = conv1d(enc, hp.embed_size // 2, 5, scope="conv1d_2") # (N, T, E/2) enc = normalize(enc, type=hp.norm_type, is_training=is_training, activation_fn=None, scope="norm2") enc += prenet_out # (N, T, E/2) # residual connections ## Highway Nets for i in range(hp.num_highwaynet_blocks): enc = highwaynet(enc, num_units=hp.embed_size // 2, scope='highwaynet_{}'.format(i)) # (N, T, E/2) ## Bidirectional GRU enc = gru(enc, hp.embed_size // 2, True, scope="gru1") # (N, T, E) ## Readout self.outputs = tf.layers.dense(enc, len(hanzi2idx), use_bias=False) self.preds = tf.to_int32(tf.arg_max(self.outputs, dimension=-1)) if is_training: self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=self.outputs) self.istarget = tf.to_float(tf.not_equal(self.y, tf.zeros_like(self.y))) # masking self.hits = tf.to_float(tf.equal(self.preds, self.y)) * self.istarget self.acc = tf.reduce_sum(self.hits) / tf.reduce_sum(self.istarget) self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / tf.reduce_sum(self.istarget) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.scalar('acc', self.acc) self.merged = tf.summary.merge_all()