def _predict(sess, examples: [InputExample]): hypotheses, inputs = [], [] features = [] for example in examples: feature = convert_single_example( ex_index=0, example=example, max_seq_length=config_data.max_seq_length, tokenizer=tokenizer) features.append(feature) for feature in features: feed_dict = { src_input_ids: [feature.src_input_ids], src_segment_ids: [feature.src_segment_ids], tx.global_mode(): tf.estimator.ModeKeys.PREDICT, } fetches = { 'beam_search_ids': beam_search_ids, 'src_input_ids': src_input_ids } fetches_ = sess.run(fetches, feed_dict=feed_dict) hypotheses.extend(h.tolist() for h in fetches_['beam_search_ids']) inputs.extend(h.tolist() for h in fetches_['src_input_ids']) hypotheses = utils.list_strip_eos(hypotheses, eos_token_id) write_token_id_arrays_to_text_file( inputs, os.path.join(model_dir, 'predict-inputs.txt'), tokenizer) write_token_id_arrays_to_text_file( hypotheses, os.path.join(model_dir, 'predict-predictions.txt'), tokenizer)
def _train_epoch(sess): """Trains on the training set, and evaluates on the dev set periodically. """ iterator.restart_dataset(sess, 'train') fetches = {'loss': train_op, 'step': global_step} while True: try: feed_dict = { iterator.handle: iterator.get_handle(sess, 'train'), tx.global_mode(): tf.estimator.ModeKeys.TRAIN, } rets = sess.run(fetches, feed_dict) step = rets['step'] dis_steps = config_train.display_steps if _is_head() and dis_steps > 0 and step % dis_steps == 0: tf.logging.info('step:%d; loss:%f' % (step, rets['loss'])) eval_steps = config_train.eval_steps if _is_head() and eval_steps > 0 and step % eval_steps == 0: _dev_epoch(sess) ckpt_steps = config_train.checkpoint_steps if _is_head() and ckpt_steps > 0 and step % ckpt_steps == 0: ckpt_fn = os.path.join(FLAGS.output_dir, 'model.ckpt') ckpt_fn = saver.save(sess, ckpt_fn, global_step=step) tf.logging.info('Checkpoint to {}'.format(ckpt_fn)) except tf.errors.OutOfRangeError: break
def _run_epoch(sess, data_iter, epoch, is_train=False): loss = 0. iters = 0 fetches = {"mle_loss": mle_loss} if is_train: fetches["train_op"] = train_op mode = (tf.estimator.ModeKeys.TRAIN if is_train else tf.estimator.ModeKeys.EVAL) for _, (x, y) in enumerate(data_iter): batch_size = x.shape[0] feed_dict = { inputs: x, targets: y, learning_rate: lr, tx.global_mode(): mode, } rets = sess.run(fetches, feed_dict) loss += rets["mle_loss"] iters += batch_size ppl = np.exp(loss / iters) return ppl
def _eval_epoch(sess, mode): """`mode` is one of {'val', 'test'} """ iterator.restart_dataset(sess, mode) refs, hypos = [], [] while True: try: fetches = [ batch['target_text'][:, 1:], infer_outputs.predicted_ids[:, :, 0] ] feed_dict = { tx.global_mode(): tf.estimator.ModeKeys.PREDICT, iterator.handle: iterator.get_handle(sess, mode) } target_texts, output_ids = \ sess.run(fetches, feed_dict=feed_dict) target_texts = tx.utils.strip_special_tokens(target_texts) output_texts = tx.utils.map_ids_to_strs( ids=output_ids, vocab=val_data.target_vocab) for hypo, ref in zip(output_texts, target_texts): hypos.append(hypo) refs.append([ref]) except tf.errors.OutOfRangeError: break return tx.evals.corpus_bleu_moses(list_of_references=refs, hypotheses=hypos)
def _eval(sess, epoch, data_tag): fetches = { "predicts": predicts, } mode = tf.estimator.ModeKeys.EVAL file_name = 'tmp/%s%d' % (data_tag, epoch) writer = CoNLLWriter(i2w, i2n) writer.start(file_name) data = data_dev if data_tag == 'dev' else data_test for batch in iterate_batch(data, config.batch_size, shuffle=False): word, char, ner, mask, length = batch feed_dict = { inputs: word, chars: char, targets: ner, masks: mask, seq_lengths: length, global_step: epoch, tx.global_mode(): mode, } rets = sess.run(fetches, feed_dict) predictions = rets['predicts'] writer.write(word, predictions, ner, length) writer.close() acc, precision, recall, f1 = scores.scores(file_name) print('%s acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (data_tag, acc, precision, recall, f1)) return acc, precision, recall, f1
def _eval_epoch(sess, mode): if mode == 'val': data_iterator.switch_to_val_data(sess) else: data_iterator.switch_to_test_data(sess) refs, hypos = [], [] while True: try: fetches = [ batch['target_text'][:, 1:], infer_outputs.predicted_ids[:, :, 0] ] feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.EVAL} target_texts_ori, output_ids = \ sess.run(fetches, feed_dict=feed_dict) target_texts = tx.utils.strip_special_tokens( target_texts_ori, is_token_list=True) output_texts = tx.utils.map_ids_to_strs( ids=output_ids, vocab=val_data.target_vocab) for hypo, ref in zip(output_texts, target_texts): hypos.append(hypo) refs.append([ref]) except tf.errors.OutOfRangeError: break return tx.evals.corpus_bleu_moses(list_of_references=refs, hypotheses=hypos)
def _train_epoch(sess): """Trains on the training set, and evaluates on the dev set periodically. """ iterator.restart_dataset(sess, 'train') fetches = { 'train_op': train_op, 'loss': loss, 'batch_size': batch_size, 'step': global_step } while True: try: feed_dict = { iterator.handle: iterator.get_handle(sess, 'train'), tx.global_mode(): tf.estimator.ModeKeys.TRAIN, } rets = sess.run(fetches, feed_dict) step = rets['step'] dis_steps = config_data.display_steps if _is_head() and dis_steps > 0 and step % dis_steps == 0: tf.logging.info('step:%d; loss:%f;' % (step, rets['loss'])) eval_steps = config_data.eval_steps if _is_head() and eval_steps > 0 and step % eval_steps == 0: _eval_epoch(sess) except tf.errors.OutOfRangeError: break
def _eval_epoch(sess, mode, epoch_no): """ This function is the same as _eval_epoch() in baseline_seq2seq_attn_main.py. """ if mode == 'val': data_iterator.switch_to_val_data(sess) else: data_iterator.switch_to_test_data(sess) refs, hypos = [], [] while True: try: fetches = [ batch['target_text'][:, 1:], infer_outputs.predicted_ids[:, :, 0] ] feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.EVAL} target_texts_ori, output_ids = \ sess.run(fetches, feed_dict=feed_dict) target_texts = tx.utils.strip_special_tokens( target_texts_ori.tolist(), is_token_list=True) target_texts = tx.utils.str_join(target_texts) output_texts = tx.utils.map_ids_to_strs( ids=output_ids, vocab=val_data.target_vocab) tx.utils.write_paired_text(target_texts, output_texts, log_dir + mode + '_results' + str(epoch_no) + '.txt', append=True, mode='h', sep=' ||| ') for hypo, ref in zip(output_texts, target_texts): if config_data.eval_metric == 'bleu': hypos.append(hypo) refs.append([ref]) elif config_data.eval_metric == 'rouge': hypos.append(tx.utils.compat_as_text(hypo)) refs.append(tx.utils.compat_as_text(ref)) except tf.errors.OutOfRangeError: break if config_data.eval_metric == 'bleu': return tx.evals.corpus_bleu_moses(list_of_references=refs, hypotheses=hypos) elif config_data.eval_metric == 'rouge': rouge = Rouge() return rouge.get_scores(hyps=hypos, refs=refs, avg=True)
def _test_epoch_ppl(sess, epoch): iterator.switch_to_test_data(sess) pples = [] while True: try: feed = {tx.global_mode(): tf.estimator.ModeKeys.EVAL} ppl = sess.run(perplexity, feed_dict=feed) pples.append(ppl) except tf.errors.OutOfRangeError: avg_ppl = np.mean(pples) print('epoch {} perplexity={}'.format(epoch, avg_ppl)) break
def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False): start_time = time.time() loss = 0. iters = 0 fetches = { "mle_loss": mle_loss, "final_state": final_state, } if is_train: fetches["train_op"] = train_op epoch_size = (len(data["train_text_id"]) // batch_size - 1)\ // num_steps mode = (tf.estimator.ModeKeys.TRAIN if is_train else tf.estimator.ModeKeys.EVAL) for step, (x, y) in enumerate(data_iter): if step == 0: state = sess.run(initial_state, feed_dict={inputs: x}) feed_dict = { inputs: x, targets: y, global_step: epoch, tx.global_mode(): mode, } for i, (c, h) in enumerate(initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h rets = sess.run(fetches, feed_dict) loss += rets["mle_loss"] state = rets["final_state"] iters += num_steps ppl = np.exp(loss / iters) if verbose and is_train and hvd.rank() == 0 \ and (step + 1) % (epoch_size // 10) == 0: tf.logging.info( "%.3f perplexity: %.3f speed: %.0f wps" % ((step + 1) * 1.0 / epoch_size, ppl, iters * batch_size / (time.time() - start_time))) _elapsed_time = time.time() - start_time tf.logging.info("epoch time elapsed: %f" % (_elapsed_time)) ppl = np.exp(loss / iters) return ppl, _elapsed_time
def _train_epoch(sess, epoch, display=1000): iterator.switch_to_train_data(sess) while True: try: feed = {tx.global_mode(): tf.estimator.ModeKeys.TRAIN} step, loss, _ = sess.run([global_step, mle_loss, train_op], feed_dict=feed) if step % display == 0: print('step {} at epoch {}: loss={}'.format( step, epoch, loss)) except tf.errors.OutOfRangeError: break print('epoch {} train: loss={}'.format(epoch, loss))
def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False): start_time = time.time() loss = 0. iters = 0 state = sess.run(initial_state) fetches = { "mle_loss": mle_loss, "final_state": final_state, } if is_train: fetches["train_op"] = train_op mode = (tf.estimator.ModeKeys.TRAIN if is_train else tf.estimator.ModeKeys.EVAL) epoch_size = (len(train) // batch_size - 1) // num_steps for step, data_batch in enumerate(data_iter): feed_dict = { inputs: data_batch.text, targets: data_batch.target, global_step: epoch, tx.global_mode(): mode, } for i, (c, h) in enumerate(initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h rets = sess.run(fetches, feed_dict) loss += rets["mle_loss"] state = rets["final_state"] iters += num_steps ppl = np.exp(loss / iters) if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, ppl, iters * batch_size / (time.time() - start_time))) ppl = np.exp(loss / iters) return ppl
def _train_epoch(sess, epoch, step, smry_writer): print('Start epoch %d' % epoch) data_iterator.restart_dataset(sess, 'train') fetches = { 'train_op': train_op, 'loss': mle_loss, 'step': global_step, 'smry': summary_merged } while True: try: feed_dict = { data_iterator.handle: data_iterator.get_handle(sess, 'train'), tx.global_mode(): tf.estimator.ModeKeys.TRAIN, learning_rate: utils.get_lr(step, config_model) } fetches_ = sess.run(fetches, feed_dict) step, loss = fetches_['step'], fetches_['loss'] # Display every display_steps display_steps = config_data.display_steps if display_steps > 0 and step % display_steps == 0: print( '[%s] step: %d, loss: %.4f' % (strftime("%Y-%m-%d %H:%M:%S", gmtime()), step, loss)) smry_writer.add_summary(fetches_['smry'], global_step=step) # Eval every eval_steps eval_steps = config_data.eval_steps if eval_steps > 0 and step % eval_steps == 0 and step > 0: _eval_epoch(sess, epoch, 'eval') except tf.errors.OutOfRangeError: break return step
def _train_epoch(sess, epoch): start_time = time.time() loss = 0. corr = 0. num_tokens = 0. fetches = { "mle_loss": mle_loss, "correct": corrects, } fetches["train_op"] = train_op mode = tf.estimator.ModeKeys.TRAIN num_inst = 0 for batch in iterate_batch(data_train, config.batch_size, shuffle=True): word, char, ner, mask, length = batch feed_dict = { inputs: word, chars: char, targets: ner, masks: mask, seq_lengths: length, global_step: epoch, tx.global_mode(): mode, } rets = sess.run(fetches, feed_dict) nums = np.sum(length) num_inst += len(word) loss += rets["mle_loss"] * nums corr += rets["correct"] num_tokens += nums print("train: %d (%d/%d) loss: %.4f, acc: %.2f%%" % (epoch, num_inst, len(data_train), loss / num_tokens, corr / num_tokens * 100)) print("train: %d loss: %.4f, acc: %.2f%%, time: %.2fs" % (epoch, loss / num_tokens, corr / num_tokens * 100, time.time() - start_time))
def _main(_): env = gym.make('CartPole-v0') env = env.unwrapped env_config = tx.agents.get_gym_env_config(env) agent = PGAgent( env_config, policy_kwargs={'action_space': env_config.action_space}, hparams=config.pg_agent_hparams) sess = tf.Session() agent.sess = sess sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.TRAIN} for e in range(300): reward_sum = 0. observ = env.reset() agent.reset() while True: action = agent.get_action(observ, feed_dict=feed_dict) next_observ, reward, terminal, _ = env.step(action=action) if terminal: reward = 0. agent.observe(reward, terminal, feed_dict=feed_dict) observ = next_observ reward_sum += reward if terminal: break if (e + 1) % 10 == 0: print('episode {}: {}'.format(e + 1, reward_sum)) sess.close()
def _eval_epoch(sess, epoch, mode): if mode == 'eval': eval_data = dev_data elif mode == 'test': eval_data = test_data else: raise ValueError('`mode` should be either "eval" or "test".') references, hypotheses = [], [] bsize = config_data.test_batch_size for i in range(0, len(eval_data), bsize): sources, targets = zip(*eval_data[i:i + bsize]) x_block = data_utils.source_pad_concat_convert(sources) feed_dict = { encoder_input: x_block, tx.global_mode(): tf.estimator.ModeKeys.EVAL, } fetches = { 'beam_search_ids': beam_search_ids, } fetches_ = sess.run(fetches, feed_dict=feed_dict) hypotheses.extend(h.tolist() for h in fetches_['beam_search_ids']) references.extend(r.tolist() for r in targets) hypotheses = utils.list_strip_eos(hypotheses, eos_token_id) references = utils.list_strip_eos(references, eos_token_id) if mode == 'eval': # Writes results to files to evaluate BLEU # For 'eval' mode, the BLEU is based on token ids (rather than # text tokens) and serves only as a surrogate metric to monitor # the training process fname = os.path.join(FLAGS.model_dir, 'tmp.eval') hypotheses = tx.utils.str_join(hypotheses) references = tx.utils.str_join(references) hyp_fn, ref_fn = tx.utils.write_paired_text(hypotheses, references, fname, mode='s') eval_bleu = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True) eval_bleu = 100. * eval_bleu logger.info('epoch: %d, eval_bleu %.4f', epoch, eval_bleu) print('epoch: %d, eval_bleu %.4f' % (epoch, eval_bleu)) if eval_bleu > best_results['score']: logger.info('epoch: %d, best bleu: %.4f', epoch, eval_bleu) best_results['score'] = eval_bleu best_results['epoch'] = epoch model_path = os.path.join(FLAGS.model_dir, 'best-model.ckpt') logger.info('saving model to %s', model_path) print('saving model to %s' % model_path) saver.save(sess, model_path) elif mode == 'test': # For 'test' mode, together with the cmds in README.md, BLEU # is evaluated based on text tokens, which is the standard metric. fname = os.path.join(FLAGS.model_dir, 'test.output') hwords, rwords = [], [] for hyp, ref in zip(hypotheses, references): hwords.append([id2w[y] for y in hyp]) rwords.append([id2w[y] for y in ref]) hwords = tx.utils.str_join(hwords) rwords = tx.utils.str_join(rwords) hyp_fn, ref_fn = tx.utils.write_paired_text(hwords, rwords, fname, mode='s', src_fname_suffix='hyp', tgt_fname_suffix='ref') logger.info('Test output writtn to file: %s', hyp_fn) print('Test output writtn to file: %s' % hyp_fn)
def _run_epoch(sess, epoch, mode_string, display=10): if mode_string == 'train': iterator.switch_to_train_data(sess) elif mode_string == 'valid': iterator.switch_to_val_data(sess) elif mode_string == 'test': iterator.switch_to_test_data(sess) step = 0 start_time = time.time() num_words = num_sents = 0 nll_ = 0. kl_loss_ = rc_loss_ = 0. while True: try: fetches = { "nll": nll, "kl_loss": kl_loss, "rc_loss": rc_loss, "lengths": seq_lengths } if mode_string == 'train': fetches["train_op"] = train_op opt_vars["kl_weight"] = min( 1.0, opt_vars["kl_weight"] + anneal_r) kl_weight_ = opt_vars["kl_weight"] else: kl_weight_ = 1.0 mode = (tf.estimator.ModeKeys.TRAIN if mode_string == 'train' else tf.estimator.ModeKeys.EVAL) feed = { tx.global_mode(): mode, kl_weight: kl_weight_, learning_rate: opt_vars["learning_rate"] } fetches_ = sess.run(fetches, feed_dict=feed) batch_size_ = len(fetches_["lengths"]) num_sents += batch_size_ num_words += sum(fetches_["lengths"]) nll_ += fetches_["nll"] * batch_size_ kl_loss_ += fetches_["kl_loss"] * batch_size_ rc_loss_ += fetches_["rc_loss"] * batch_size_ if step % display == 0 and mode_string == 'train': print('%s: epoch %d, step %d, nll %.4f, klw: %.4f, ' \ 'KL %.4f, rc %.4f, log_ppl %.4f, ppl %.4f, ' \ 'time elapsed: %.1fs' % \ (mode_string, epoch, step, nll_ / num_sents, opt_vars["kl_weight"], kl_loss_ / num_sents, rc_loss_ / num_sents, nll_ / num_words, np.exp(nll_ / num_words), time.time() - start_time)) sys.stdout.flush() step += 1 except tf.errors.OutOfRangeError: print('\n%s: epoch %d, nll %.4f, KL %.4f, rc %.4f, ' \ 'log_ppl %.4f, ppl %.4f\n' % (mode_string, epoch, nll_ / num_sents, kl_loss_ / num_sents, rc_loss_ / num_sents, nll_ / num_words, np.exp(nll_ / num_words))) break return nll_ / num_sents, np.exp(nll_ / num_words)
def _generate(sess, saver, fname=None): if tf.train.checkpoint_exists(FLAGS.model): saver.restore(sess, FLAGS.model) else: raise ValueError("cannot find checkpoint model") batch_size = train_data.batch_size dst = tfd.MultivariateNormalDiag( loc=tf.zeros([batch_size, config.latent_dims]), scale_diag=tf.ones([batch_size, config.latent_dims])) dcdr_states, latent_z = connector_stoch(dst) vocab = train_data.vocab start_tokens = tf.ones(batch_size, tf.int32) * vocab.bos_token_id end_token = vocab.eos_token_id if config.decoder_type == "lstm": def _cat_embedder(ids): """Concatenates latent variable to input word embeddings """ embedding = decoder_w_embedder(ids) return tf.concat([embedding, latent_z], axis=1) outputs, _, _ = decoder(initial_state=dcdr_states, decoding_strategy="infer_sample", embedding=_cat_embedder, max_decoding_length=100, start_tokens=start_tokens, end_token=end_token) else: def _embedding_fn(ids, times): w_embed = decoder_w_embedder(ids) p_embed = decoder_p_embedder(times) return w_embed * config.hidden_size**0.5 + p_embed outputs, _ = decoder(memory=dcdr_states, decoding_strategy="infer_sample", memory_sequence_length=tf.ones( tf.shape(dcdr_states)[0]), embedding=_embedding_fn, max_decoding_length=100, start_tokens=start_tokens, end_token=end_token) sample_tokens = vocab.map_ids_to_tokens(outputs.sample_id) sess.run(tf.tables_initializer()) feed = {tx.global_mode(): tf.estimator.ModeKeys.PREDICT} sample_tokens_ = sess.run(sample_tokens, feed_dict=feed) if fname is None: fh = sys.stdout else: fh = open(fname, 'w', encoding='utf-8') for sent in sample_tokens_: sent = tx.utils.compat_as_text(list(sent)) end_id = len(sent) if vocab.eos_token in sent: end_id = sent.index(vocab.eos_token) fh.write(' '.join(sent[:end_id + 1]) + '\n') print('Output done') fh.close()
def _test_epoch_bleu(sess, epoch, sample_text, sample_lengths): iterator.switch_to_test_data(sess) bleu_prec = [[] for i in range(1, 5)] bleu_recall = [[] for i in range(1, 5)] def _bleus(ref, sample): res = [] for weight in [[1, 0, 0, 0], [1, 0, 0, 0], [1 / 2., 1 / 2., 0, 0], [1 / 3., 1 / 3., 1 / 3., 0], [1 / 4., 1 / 4., 1 / 4., 1 / 4.]]: res.append( sentence_bleu( [ref], sample, smoothing_function=SmoothingFunction().method7, weights=weight)) return res while True: try: feed = {tx.global_mode(): tf.estimator.ModeKeys.EVAL} samples_, sample_lengths_, references, refs_cnt = \ sess.run([sample_text, sample_lengths, data_batch['refs_text'][:, :, 1:], data_batch['refs_utterance_cnt']], feed_dict=feed) samples_ = np.transpose(samples_, (0, 2, 1)) samples_ = [[ sample[:l] for sample, l in zip(beam, lens) ] for beam, lens in zip(samples_.tolist(), sample_lengths_)] references = [[ ref[:ref.index(b'<EOS>')] for ref in refs[:cnt] ] for refs, cnt in zip(references.tolist(), refs_cnt)] for beam, refs in zip(samples_, references): bleu_scores = [[_bleus(ref, sample) for ref in refs] for sample in beam] bleu_scores = np.transpose(np.array(bleu_scores), (2, 0, 1)) for i in range(1, 5): bleu_i = bleu_scores[i] bleu_i_precision = bleu_i.max(axis=1).mean() bleu_i_recall = bleu_i.max(axis=0).mean() bleu_prec[i - 1].append(bleu_i_precision) bleu_recall[i - 1].append(bleu_i_recall) except tf.errors.OutOfRangeError: break bleu_prec = [np.mean(x) for x in bleu_prec] bleu_recall = [np.mean(x) for x in bleu_recall] print('epoch {}:'.format(epoch)) for i in range(1, 5): print(' -- bleu-{} prec={}, recall={}'.format( i, bleu_prec[i - 1], bleu_recall[i - 1]))
def _eval_epoch(sess, mode, epoch_no): if mode == 'dev': data_iterator.switch_to_val_data(sess) else: data_iterator.switch_to_test_data(sess) loader.restart(mode, batch_size=batch_size) batched_data = loader.get_next_batch(mode) refs, hypos = [], [] refs_id, hypos_id = [], [] while batched_data is not None: fetches = [infer_outputs.predicted_ids[:, :, 0]] feed_dict = { tx.global_mode(): tf.estimator.ModeKeys.EVAL, batch['source_text_ids']: batched_data['post'], batch['source_length']: batched_data['post_length'], batch['target_text_ids']: batched_data['resp'], batch['target_length']: batched_data['resp_length'] } output_ids = sess.run(fetches, feed_dict=feed_dict) x = [ loader.convert_ids_to_tokens(q, trim=True)[1:] for q in batched_data['resp'] ] target_texts = tx.utils.str_join(x) # print('x:{}\ntarget_texts:{}'.format(x, target_texts)) y = [ loader.convert_ids_to_tokens(q, trim=True) for q in output_ids[0] ] output_texts = tx.utils.str_join(y) tx.utils.write_paired_text(target_texts, output_texts, log_dir + mode + '_results' + str(epoch_no) + '.txt', append=True, mode='h', sep=' ||| ') for hypo_id, ref_id in zip(output_ids[0], batched_data['resp']): if config_data.eval_metric == 'bleu': hypos_id.append(hypo_id) refs_id.append(ref_id) for hypo, ref in zip(output_texts, target_texts): if config_data.eval_metric == 'bleu': hypos.append(hypo) refs.append([ref]) elif config_data.eval_metric == 'rouge': hypos.append(tx.utils.compat_as_text(hypo)) refs.append(tx.utils.compat_as_text(ref)) batched_data = loader.get_next_batch(mode) if debug: break if config_data.eval_metric == 'bleu': BleuMetric = cotk.metric.BleuCorpusMetric(loader) data = {'ref_allvocabs': refs_id, 'gen': hypos_id} BleuMetric.forward(data) result = BleuMetric.close() return result['bleu'], result elif config_data.eval_metric == 'rouge': rouge = Rouge() return rouge.get_scores(hyps=hypos, refs=refs, avg=True)
if __name__ == '__main__': env = gym.make('CartPole-v0') env = env.unwrapped env_config = tx.agents.get_gym_env_config(env) agent = tx.agents.ActorCriticAgent(env_config=env_config) with tf.Session() as sess: agent.sess = sess sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.TRAIN} for e in range(5000): reward_sum = 0. observ = env.reset() agent.reset() while True: action = agent.get_action(observ, feed_dict=feed_dict) next_observ, reward, terminal, _ = env.step(action=action) agent.observe(reward, terminal, feed_dict=feed_dict) observ = next_observ reward_sum += reward if terminal: break
def _g_test_epoch(sess, epoch, mode_string): def _id2word_map(id_arrays): return [ ' '.join( [train_data.vocab.id_to_token_map_py[i] for i in sent]) for sent in id_arrays ] if mode_string == 'valid': iterator.switch_to_val_data(sess) elif mode_string == 'test': iterator.switch_to_test_data(sess) else: raise ValueError("Expect mode_string to be one of " "['valid', 'test'], got %s" % mode_string) target_list, inference_list = [], [] loss, steps = 0., 0 while True: try: fetches = {"mle_loss": mle_loss, "num_steps": num_steps} if mode_string == 'test': fetches['target_sample_id'] = data_batch["text_ids"] fetches['infer_sample_id'] = infer_sample_ids feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.EVAL} rtns = sess.run(fetches, feed_dict) loss += rtns['mle_loss'] steps += rtns['num_steps'] if mode_string == 'test': targets = _id2word_map( rtns['target_sample_id'][:, 1:].tolist()) # remove <BOS> for t in targets: target_list.extend(t.split('<EOS>')[0].strip().split()) inferences = _id2word_map(rtns['infer_sample_id'].tolist()) for inf in inferences: inference_list.extend( inf.split('<EOS>')[0].strip().split()) except tf.errors.OutOfRangeError: break ppl = np.exp(loss / steps) rst = "G {0:6s} epoch {1:3d}, step {2:3s}:" \ " {3:5s}_ppl: {4:6f}"\ .format(mode_string, epoch, '-', mode_string, ppl) log.write(rst + '\n') log.flush() print(rst) if mode_string == 'test': bleu_test = tx.evals.sentence_bleu_moses(references=[target_list], hypothesis=inference_list, lowercase=True, return_all=True) if not isinstance( bleu_test, np.ndarray): # might return 0.0 if inference_list is null bleu_test = [bleu_test] * 5 rst_test = "epoch %d BLEU1~4 on test dataset:\n" \ "%f\n%f\n%f\n%f\n\n" % \ (epoch, bleu_test[1], bleu_test[2], bleu_test[3], bleu_test[4]) print(rst_test) bleu_log.write(rst_test) bleu_log.flush() return
def _eval_epoch(sess, epoch, mode): print('Starting %s' % mode) if mode is not 'eval' and not 'test': print("Unknown mode!") raise dataset_name = 'eval' if mode is 'eval' else 'test' data_iterator.restart_dataset(sess, dataset_name) references, hypotheses, inputs = [], [], [] while True: try: feed_dict = { data_iterator.handle: data_iterator.get_handle(sess, dataset_name), tx.global_mode(): tf.estimator.ModeKeys.EVAL, } fetches = { 'beam_search_ids': beam_search_ids, 'tgt_labels': tgt_labels, # src_input_ids is not necessary for calculating the metric, but allows us to write it to a file. 'src_input_ids': src_input_ids } fetches_ = sess.run(fetches, feed_dict=feed_dict) hypotheses.extend(h.tolist() for h in fetches_['beam_search_ids']) references.extend(r.tolist() for r in fetches_['tgt_labels']) inputs.extend(h.tolist() for h in fetches_['src_input_ids']) hypotheses = utils.list_strip_eos(hypotheses, eos_token_id) references = utils.list_strip_eos(references, eos_token_id) except tf.errors.OutOfRangeError: break def calculate_scores(): hyp_fn, ref_fn = 'tmp.%s.src' % mode, 'tmp.%s.tgt' % mode write_token_id_arrays_to_text_file(hypotheses, os.path.join(model_dir, hyp_fn), tokenizer) write_token_id_arrays_to_text_file(references, os.path.join(model_dir, ref_fn), tokenizer) hyp_fn, ref_fn = os.path.join(model_dir, hyp_fn), os.path.join( model_dir, ref_fn) files_rouge = FilesRouge(hyp_fn, ref_fn) rouge_scores = files_rouge.get_scores(avg=True) bleu_score = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True) return rouge_scores, bleu_score if mode == 'eval': try: rouge_scores, bleu_score = calculate_scores() except ValueError: print("Failed to calculate rouge scores!") return print_rouge_scores(rouge_scores) print('epoch: %d, bleu_score %.4f' % (epoch, bleu_score)) if bleu_score > best_results['score']: best_results['score'] = bleu_score best_results['epoch'] = epoch model_path = os.path.join(model_dir, 'best-model.ckpt') print('saving model to %s' % model_path) # Also save the best results in a text file for manual evaluation write_token_id_arrays_to_text_file( inputs, os.path.join(model_dir, 'eval-inputs.txt'), tokenizer) write_token_id_arrays_to_text_file( hypotheses, os.path.join(model_dir, 'eval-predictions.txt'), tokenizer) write_token_id_arrays_to_text_file( references, os.path.join(model_dir, 'eval-targets.txt'), tokenizer) saver.save(sess, model_path) elif mode == 'test': rouge_scores, bleu_score = calculate_scores() print_rouge_scores(rouge_scores) print('bleu_score %.4f' % bleu_score) # Also save the results in a text file for manual evaluation write_token_id_arrays_to_text_file( inputs, os.path.join(model_dir, 'test-inputs.txt'), tokenizer) write_token_id_arrays_to_text_file( hypotheses, os.path.join(model_dir, 'test-predictions.txt'), tokenizer) write_token_id_arrays_to_text_file( references, os.path.join(model_dir, 'test-targets.txt'), tokenizer)