def experiment_fn(run_config, params): conversation = Conversation() estimator = tf.estimator.Estimator(model_fn=conversation.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set() train_input_fn, train_input_hook = dataset.get_train_inputs( train_X, train_y) test_input_fn, test_input_hook = dataset.get_test_inputs(test_X, test_y) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=[ train_input_hook, hook.print_variables( variables=['train/enc_0', 'train/dec_0', 'train/pred_0'], vocab=vocab, every_n_iter=Config.train.check_hook_n_iter) ], eval_hooks=[test_input_hook]) return experiment
def main(): estimator = _make_estimator() vocab = data_loader.load_vocab("vocab.txt") while True: text = input('input text > ').strip() ids = data_loader.sentence2id(text, vocab) result = predict(ids, estimator) show(text, result)
def main(): params = tf.contrib.training.HParams(**Config.model.to_dict()) run_config = tf.estimator.RunConfig( model_dir=Config.train.model_dir, save_checkpoints_steps=Config.train.save_checkpoints_steps, ) tf_config = os.environ.get('TF_CONFIG', '{}') tf_config_json = json.loads(tf_config) cluster = tf_config_json.get('cluster') job_name = tf_config_json.get('task', {}).get('type') task_index = tf_config_json.get('task', {}).get('index') cluster_spec = tf.train.ClusterSpec(cluster) server = tf.train.Server(cluster_spec, job_name=job_name, task_index=task_index) if job_name == "ps": tf.logging.info("Started server!") server.join() if job_name == "worker": with tf.Session(server.target): with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % task_index, cluster=cluster)): tf.logging.info("Initializing Estimator") conversation = Conversation() estimator = tf.estimator.Estimator( model_fn=conversation.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) tf.logging.info("Initializing vocabulary") vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set( ) train_input_fn, train_input_hook = data_loader.make_batch( (train_X, train_y), batch_size=Config.model.batch_size) test_input_fn, test_input_hook = data_loader.make_batch( (test_X, test_y), batch_size=Config.model.batch_size, scope="test") tf.logging.info("Initializing Specifications") train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=1000) eval_spec = tf.estimator.EvalSpec(input_fn=test_input_fn) tf.logging.info("Run training") tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def main(Config, mode): # 返回字典 vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) with open(os.path.join(Config.data.base_path, Config.data.processed_path, 'oov_size'), 'r', encoding='utf-8') as f: oov_size = int(f.readline().strip()) Config.data.oov_size = oov_size Config.data.vocab = vocab rev_vocab = utils.get_rev_vocab(vocab) Config.data.rev_vocab = rev_vocab if mode == 'train': # save_path = os.path.join(Config.train.model_dir) # if not os.path.exists(save_path): # os.makedirs(save_path) # with open(os.path.join(save_path, 'vocab.pkl'), 'wb') as f: # cPickle.dump(vocab, f) # 定义训练数据 train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set( tsize=Config.train.size) model = Model(Config) trainer = dy.AdamTrainer(model.model) # model.load('model-1-final') global best_blue for e in range(Config.train.epoch): dev_blue = train(train_X, train_y, model, Config, test_X, test_y, e, trainer) if dev_blue > best_blue: # if (e + 1) % 50 == 0: best_blue = dev_blue model.save('model-{}-{}'.format(e + 1, 'final')) eval(train_X, train_y, model) if mode == 'eval': # save_path = os.path.join(Config.train.model_dir) # with open(os.path.join(save_path, 'vocab.pkl'), 'rb') as f: # vocab = cPickle.load(f) Config.vocab = vocab rev_vocab = utils.get_rev_vocab(vocab) Config.data.rev_vocab = rev_vocab test_X, test_y = data_loader.make_eval_set() model = Model(Config) model.load('model-1-final') eval(test_X, test_y, model)
def main(): vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) while True: sentence = _get_user_input() ids = data_loader.sentence2id(vocab, sentence) ids += [Config.data.START_ID] if len(ids) > Config.data.max_seq_length: print(f"Max length I can handle is: {Config.data.max_seq_length}") continue answer = chat(ids, vocab) print(answer)
def main(): data_loader.set_max_seq_length(['train_X_ids', 'test_X_ids']) vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) print("Typing anything :) \n") while True: sentence = _get_user_input() ids = data_loader.sentence2id(vocab, sentence) if len(ids) > Config.data.max_seq_length: print(f"Max length I can handle is: {Config.data.max_seq_length}") continue result = predict(ids) print(result)
def experiment_fn(run_config, params): # 先定义estimator conversation = Conversation() estimator = tf.estimator.Estimator(model_fn=conversation.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) # 返回字典 vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) # 定义训练数据 train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set() train_input_fn, train_input_hook = data_loader.make_batch( (train_X, train_y), batch_size=Config.model.batch_size) test_input_fn, test_input_hook = data_loader.make_batch( (test_X, test_y), batch_size=Config.model.batch_size, scope="test") train_hooks = [train_input_hook] if Config.train.print_verbose: train_hooks.append( hook.print_variables( variables=['train/enc_0', 'train/dec_0', 'train/pred_0'], rev_vocab=utils.get_rev_vocab(vocab), every_n_iter=Config.train.check_hook_n_iter)) if Config.train.debug: train_hooks.append(tf_debug.LocalCLIDebugHook()) eval_hooks = [test_input_hook] if Config.train.debug: eval_hooks.append(tf_debug.LocalCLIDebugHook()) # 定义实验 experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=train_hooks, eval_hooks=eval_hooks, eval_delay_secs=0) return experiment
def experiment_fn(run_config, params): model = Model() estimator = tf.estimator.Estimator(model_fn=model.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) train_data, test_data = data_loader.make_train_and_test_set() train_input_fn, train_input_hook = data_loader.make_batch(train_data, batch_size=Config.model.batch_size, scope="train") test_input_fn, test_input_hook = data_loader.make_batch(test_data, batch_size=Config.model.batch_size, scope="test") train_hooks = [train_input_hook] if Config.train.print_verbose: train_hooks.append( hook.print_variables(variables=['train/input_0'], rev_vocab=get_rev_vocab(vocab), every_n_iter=Config.train.check_hook_n_iter)) train_hooks.append( hook.print_target(variables=['train/target_0', 'train/pred_0'], every_n_iter=Config.train.check_hook_n_iter)) if Config.train.debug: train_hooks.append(tf_debug.LocalCLIDebugHook()) eval_hooks = [test_input_hook] if Config.train.debug: eval_hooks.append(tf_debug.LocalCLIDebugHook()) experiment = tf.contrib.learn.Experiment(estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=train_hooks, eval_hooks=eval_hooks) return experiment
def main(mode): (train_X, train_y), (test_X, test_y) = data_loader.make_train_and_test_set() vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) model_params = Config.model.to_dict() model = Model() run_config = tf.estimator.RunConfig(model_dir=Config.train.model_dir, save_checkpoints_steps=Config.train.save_checkpoints_steps) estimator = tf.estimator.Estimator( model_fn=model.model_fn, model_dir=Config.train.model_dir, params=model_params, config=run_config, ) tf.estimator.train_and_evaluate( estimator, tf.estimator.TrainSpec(input_fn=tf.compat.v1.estimator.inputs.numpy_input_fn( train_X, train_y, batch_size=Config.model.batch_size, num_epochs=1, shuffle=True), ), tf.estimator.EvalSpec(input_fn=tf.compat.v1.estimator.inputs.numpy_input_fn( test_X, test_y, batch_size=Config.model.batch_size, num_epochs=1, shuffle=False), ))
def predict(image_file): Config('ocr-dev') vocab = load_vocab() with tf.Session() as sess: servo_dir = '/Users/dhu/code/paragraph_classfication/logs/ocr/export/Servo' latest_dir = max([ os.path.join(servo_dir, d) for d in os.listdir(servo_dir) if os.path.isdir(os.path.join(servo_dir, d)) ], key=os.path.getmtime) tf.saved_model.loader.load(sess, ["serve"], latest_dir) with open(image_file, 'rb') as f: image_bytes = f.read() classes, scores, tf_images = sess.run([ "class:0", "scores:0", 'map/TensorArrayStack/TensorArrayGatherV3:0' ], feed_dict={ "image_bytes:0": [image_bytes] }) print(vocab.reverse(classes[0] + 1)) cv2.imshow('img', tf_images[0]) cv2.waitKey()
import data_loader from model import Seq2Seq import numpy as np import tensorflow as tf vocab = data_loader.load_vocab("vocab") train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set() seq2seq = Seq2Seq(vocab_size=len(vocab), embed_dim=100, lstm_state_size=100) sess = tf.Session() sess.run(tf.global_variables_initializer()) pred = sess.run(seq2seq.decoder_prediction, feed_dict={ seq2seq.encoder_input_: train_X[1:10,:], seq2seq.decoder_input_: train_y[1:10,:] })
def __init__(self): self.estimator = self._make_estimator() self.vocab = data_loader.load_vocab() self.pos_dict = data_loader.load_pos() self.dep_dict = data_loader.load_dep()
def __init__(self): self.estimator = self._make_estimator() self.vocab = data_loader.load_vocab() self.tag_dict = data_loader.load_tag() self.label_dict = data_loader.load_label()
def __init__(self): self.estimator = self._make_estimator() self.vocab = data_loader.load_vocab() self.tag = data_loader.load_tag() self.char2image = data_loader.load_char2image()
def experiment_fn(run_config, params): if Config.model.type == 'ocr': from ocr import ocr_model model = ocr_model.Model() elif Config.model.type == 'text': model = Model() elif Config.model.type == 'text-dnn': from textdnn import text_dnn_model model = text_dnn_model.Model() elif Config.model.type == 'line-crf': from linecrf import crf_model model = crf_model.Model() elif Config.model.type == 'cell-merge': from cellMerge import cell_model model = cell_model.Model() else: raise KeyError('Unknown model type %s' % Config.model.type) estimator = tf.estimator.Estimator( model_fn=model.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) vocab = data_loader.load_vocab() Config.data.vocab_size = len(vocab) print("vocab_size: %d" % Config.data.vocab_size) train_input_fn, train_input_hook, test_input_fn, test_input_hook = data_loader.make_train_and_test_input_fn() train_hooks = [] if train_input_hook is not None: train_hooks.append(train_input_hook) if Config.train.print_verbose and Config.model.type == 'text': train_hooks.append(hook.print_variables( variables=['train/input_0'], rev_vocab=vocab, every_n_iter=Config.train.check_hook_n_iter)) train_hooks.append(hook.print_target( variables=['train/target_0', 'train/pred_0'], rev_vocab=vocab, every_n_iter=Config.train.check_hook_n_iter)) if Config.train.print_verbose and Config.model.type == 'text-dnn': train_hooks.append(hook.print_variables( variables=['pre_processing/total_text_ids'], rev_vocab=vocab, every_n_iter=Config.train.check_hook_n_iter )) train_hooks.append(hook.print_variables( variables=['pre_processing/tags', 'text_graph/crf/crf_decode/text_pred_tags'], every_n_iter=Config.train.check_hook_n_iter)) if Config.train.debug: from tensorflow.python import debug as tf_debug train_hooks.append(tf_debug.LocalCLIDebugHook(ui_type='readline')) eval_hooks = [] if test_input_hook is not None: eval_hooks.append(test_input_hook) def serving_input_fn(): if Config.model.type == 'text': inputs = { "input_data": tf.placeholder( tf.int32, [None, Config.data.max_seq_length], name="input_data") } return tf.estimator.export.ServingInputReceiver(inputs, inputs) elif Config.model.type == 'ocr': def _preprocess_image(image_bytes): """Preprocess a single raw image.""" image = tf.image.decode_image(tf.reshape(image_bytes, shape=[]), channels=3) image.set_shape([None, None, None]) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf.image.resize_images(image, (64, 64)) image = tf.image.rgb_to_grayscale(image) return image use_uint8 = tf.placeholder(dtype=tf.bool, name='use_uint8') image_bytes_list = tf.placeholder( shape=[None], dtype=tf.string, name='image_bytes', ) uint8_images = tf.placeholder( shape=[None, 64, 64, 1], dtype=tf.uint8, name='uint8_images', ) def preprocess_uint8_image(): image = tf.image.convert_image_dtype(uint8_images, dtype=tf.float32) return image def preprocess_image(): return tf.map_fn(_preprocess_image, image_bytes_list, back_prop=False, dtype=tf.float32) features = { 'image': tf.cond(use_uint8, preprocess_uint8_image, preprocess_image, ) } receiver_tensors = {'image_bytes': image_bytes_list} return tf.estimator.export.ServingInputReceiver(features, receiver_tensors) elif Config.model.type == 'text-dnn': input_dict = { 'margin': tf.placeholder(tf.float32, shape=[None, 4], name='margin'), 'indent': tf.placeholder(tf.float32, shape=[None, 4], name='indent'), 'font_size': tf.placeholder(tf.float32, shape=[None, 2], name='font_size'), 'text': tf.placeholder(tf.string, shape=[None,], name='text'), 'text_length': tf.placeholder(tf.int32, shape=[None,], name='text_length'), 'next_text': tf.placeholder(tf.string, shape=[None,], name='next_text'), 'next_text_length': tf.placeholder(tf.int32, shape=[None,], name='next_text_length'), } return tf.estimator.export.ServingInputReceiver( features=input_dict, receiver_tensors=input_dict) elif Config.model.type == 'line-crf': from linecrf import line_example_decoder example = tf.placeholder(dtype=tf.string, shape=[], name='serialized_example') input_dict, _ = line_example_decoder.parse_tfexample_fn(example, tf.estimator.ModeKeys.PREDICT) for key in input_dict: input_dict[key] = tf.expand_dims(input_dict[key], 0) return tf.estimator.export.ServingInputReceiver( features=input_dict, receiver_tensors={'serialized_example': example}) elif Config.model.type == 'cell-merge': # 此处是为导出模型的时候使用,features就是模型中的features input_dict = { 'x_l': tf.placeholder(tf.string, shape=[None], name='x_l'), 'x_r': tf.placeholder(tf.string, shape=[None], name='x_r'), 'l': tf.placeholder(tf.int64, shape=[None], name='l'), } return tf.estimator.export.ServingInputReceiver( features=input_dict, receiver_tensors=input_dict) else: return None experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, eval_steps=Config.train.eval_steps, train_monitors=train_hooks, eval_hooks=eval_hooks, export_strategies=[saved_model_export_utils.make_export_strategy( serving_input_fn, default_output_alternative_key=None, exports_to_keep=1 )], ) return experiment
self.y_smoothed = label_smoothing(tf.one_hot(self.outpt, depth = len(de2idx))) # loss function self.loss = tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.y_smoothed) self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / (tf.reduce_sum(self.istarget)) self.global_step = tf.Variable(0, name = 'global_step', trainable = False) # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate = pm.learning_rate, beta1 = 0.9, beta2 = 0.98, epsilon = 1e-8) self.train_op = self.optimizer.minimize(self.mean_loss, global_step = self.global_step) tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all() if __name__ == '__main__': en2idx, idx2en = load_vocab('en.vocab.tsv') de2idx, idx2de = load_vocab('de.vocab.tsv') g = Graph(True) print("MSG : Graph loaded!") # save model and use this model to training supvisor = tf.train.Supervisor(graph = g.graph, logdir = pm.logdir, save_model_secs = 0) with supvisor.managed_session() as sess: for epoch in range(1, pm.num_epochs + 1): if supvisor.should_stop(): break # process bar
def __init__(self, is_training = True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.inpt, self.outpt, self.batch_num = get_batch_data() else: self.inpt = tf.placeholder(tf.int32, shape = (None, pm.maxlen)) self.outpt = tf.placeholder(tf.int32, shape = (None, pm.maxlen)) # start with 2(<STR>) and without 3(<EOS>) self.decoder_input = tf.concat((tf.ones_like(self.outpt[:, :1])*2, self.outpt[:, :-1]), -1) en2idx, idx2en = load_vocab('en.vocab.tsv') de2idx, idx2de = load_vocab('de.vocab.tsv') # Encoder with tf.variable_scope("encoder"): self.enc = embedding(self.inpt, vocab_size = len(en2idx), num_units = pm.hidden_units, scale = True, scope = "enc_embed") # Position Encoding(use range from 0 to len(inpt) to represent position dim of each words) # tf.tile(tf.expand_dims(tf.range(tf.shape(self.inpt)[1]), 0), [tf.shape(self.inpt)[0], 1]), self.enc += positional_encoding(self.inpt, vocab_size = pm.maxlen, num_units = pm.hidden_units, zero_pad = False, scale = False, scope = "enc_pe") # Dropout self.enc = tf.layers.dropout(self.enc, rate = pm.dropout, training = tf.convert_to_tensor(is_training)) # Identical for i in range(pm.num_identical): with tf.variable_scope("num_identical_{}".format(i)): # Multi-head Attention self.enc = multihead_attention(queries = self.enc, keys = self.enc, num_units = pm.hidden_units, num_heads = pm.num_heads, dropout_rate = pm.dropout, is_training = is_training, causality = False) self.enc = feedforward(self.enc, num_units = [4 * pm.hidden_units, pm.hidden_units]) # Decoder with tf.variable_scope("decoder"): self.dec = embedding(self.decoder_input, vocab_size = len(de2idx), num_units = pm.hidden_units, scale = True, scope = "dec_embed") # Position Encoding(use range from 0 to len(inpt) to represent position dim) self.dec += positional_encoding(self.decoder_input, vocab_size = pm.maxlen, num_units = pm.hidden_units, zero_pad = False, scale = False, scope = "dec_pe") # Dropout self.dec = tf.layers.dropout(self.dec, rate = pm.dropout, training = tf.convert_to_tensor(is_training)) # Identical for i in range(pm.num_identical): with tf.variable_scope("num_identical_{}".format(i)): # Multi-head Attention(self-attention) self.dec = multihead_attention(queries = self.dec, keys = self.dec, num_units = pm.hidden_units, num_heads = pm.num_heads, dropout_rate = pm.dropout, is_training = is_training, causality = True, scope = "self_attention") # Multi-head Attention(vanilla-attention) self.dec = multihead_attention(queries=self.dec, keys=self.enc, num_units=pm.hidden_units, num_heads=pm.num_heads, dropout_rate=pm.dropout, is_training=is_training, causality=False, scope="vanilla_attention") self.dec = feedforward(self.dec, num_units = [4 * pm.hidden_units, pm.hidden_units]) # Linear self.logits = tf.layers.dense(self.dec, len(de2idx)) self.preds = tf.to_int32(tf.arg_max(self.logits, dimension = -1)) self.istarget = tf.to_float(tf.not_equal(self.outpt, 0)) self.acc = tf.reduce_sum(tf.to_float(tf.equal(self.preds, self.outpt)) * self.istarget) / (tf.reduce_sum(self.istarget)) tf.summary.scalar('acc', self.acc) if is_training: # smooth inputs self.y_smoothed = label_smoothing(tf.one_hot(self.outpt, depth = len(de2idx))) # loss function self.loss = tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.y_smoothed) self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / (tf.reduce_sum(self.istarget)) self.global_step = tf.Variable(0, name = 'global_step', trainable = False) # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate = pm.learning_rate, beta1 = 0.9, beta2 = 0.98, epsilon = 1e-8) self.train_op = self.optimizer.minimize(self.mean_loss, global_step = self.global_step) tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--config', type=str, default='config', help='config file name') parser.add_argument('--src', type=str, default='example source sentence', help='input source sentence') args = parser.parse_args() Config(args.config) Config.train.batch_size = 1 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.logging.set_verbosity(tf.logging.ERROR) # set data property data_loader.set_max_seq_length(['train_ids.enc', 'train_ids.dec', 'test_ids.enc', 'test_ids.dec']) source_vocab = data_loader.load_vocab("source_vocab") target_vocab = data_loader.load_vocab("target_vocab") Config.data.rev_source_vocab = utils.get_rev_vocab(source_vocab) Config.data.rev_target_vocab = utils.get_rev_vocab(target_vocab) Config.data.source_vocab_size = len(source_vocab) Config.data.target_vocab_size = len(target_vocab) print("------------------------------------") print("Source: " + args.src) token_ids = data_loader.sentence2id(source_vocab, args.src) prediction = main(token_ids, target_vocab) print(" > Result: " + prediction)
def __init__(self): self.estimator = self._make_estimator() self.vocab = data_loader.load_vocab()
def eval(): g = Graph(is_training=False) print("MSG : Graph loaded!") X, Sources, Targets = load_data('test') en2idx, idx2en = load_vocab('en.vocab.tsv') de2idx, idx2de = load_vocab('de.vocab.tsv') with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # load pre-train model sv.saver.restore(sess, tf.train.latest_checkpoint(pm.checkpoint)) print("MSG : Restore Model!") mname = open(pm.checkpoint + '/checkpoint', 'r').read().split('"')[1] if not os.path.exists('Results'): os.mkdir('Results') with codecs.open("Results/" + mname, 'w', 'utf-8') as f: list_of_refs, predict = [], [] # Get a batch for i in range(len(X) // pm.batch_size): x = X[i * pm.batch_size:(i + 1) * pm.batch_size] sources = Sources[i * pm.batch_size:(i + 1) * pm.batch_size] targets = Targets[i * pm.batch_size:(i + 1) * pm.batch_size] # Autoregressive inference preds = np.zeros((pm.batch_size, pm.maxlen), dtype=np.int32) for j in range(pm.maxlen): _preds = sess.run(g.preds, feed_dict={ g.inpt: x, g.outpt: preds }) preds[:, j] = _preds[:, j] for source, target, pred in zip(sources, targets, preds): got = " ".join( idx2de[idx] for idx in pred).split("<EOS>")[0].strip() f.write("- Source: {}\n".format(source)) f.write("- Ground Truth: {}\n".format(target)) f.write("- Predict: {}\n\n".format(got)) f.flush() # Bleu Score ref = target.split() prediction = got.split() if len(ref) > pm.word_limit_lower and len( prediction) > pm.word_limit_lower: list_of_refs.append([ref]) predict.append(prediction) score = corpus_bleu(list_of_refs, predict) f.write("Bleu Score = " + str(100 * score))