def load_data(self): # TODO: make configurable self.data_dir = "/data/WMT15/" print("Preparing WMT data in %s" % self.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( self.data_dir, self.en_vocab_size, self.fr_vocab_size) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % self.max_train_data_size) self.dev_set = self.read_data(en_dev, fr_dev) self.train_set = self.read_data(en_train, fr_train, self.max_train_data_size) train_bucket_sizes = [ len(self.train_set[b]) for b in xrange(len(self._buckets)) ] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. self.train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ]
def train(): #创建词典,最后返回训练数据id映射文件 en_train, ch_train, _, _ = data_utils.prepare_wmt_data(400,400) with tf.Session() as sess: model = create_model(sess, False) dev_set = read_data(en_train, ch_train)#测试使用的数据 train_set = read_data(en_train, ch_train)#返回的数据句子,还没经过pad补齐 train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]#保存了每个bucket中,句子的个数 #print (train_set[2]) train_total_size = float(sum(train_bucket_sizes))#训练数据总共有多少个句子 #这个是为了合理分配每个bucket中,训练的时候batchsize的大小选择问问题,选择概率用的 train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # 开始循环训练 print ('…………………………………………开始训练×××××××××') while True: #每次训练,我们都从所有的bucket中,随机选一个bucket(根据bucket句子个数,句子多的,选中的概率大) #然后从选中的bucket中,我们又随机的选出batch个句子,进行训练 random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) print (bucket_id) #获取batch训练数据 encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) print (encoder_inputs) print (decoder_inputs) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,target_weights, bucket_id, False) #验证阶段,每训练n次,我们就验证一次,打印结果 '''if current_step % FLAGS.steps_per_checkpoint == 0:
def prepare_data(): from_train = None to_train = None from_dev = None to_dev = None if FLAGS.from_train_data and FLAGS.to_train_data: from_train_data = FLAGS.from_train_data to_train_data = FLAGS.to_train_data from_dev_data = from_train_data to_dev_data = to_train_data if FLAGS.from_dev_data and FLAGS.to_dev_data: from_dev_data = FLAGS.from_dev_data to_dev_data = FLAGS.to_dev_data from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_dir, from_train_data, to_train_data, from_dev_data, to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size, data_utils.char_tokenizer) else: # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size) return from_train, to_train, from_dev, to_dev
def train(): """Train a en->fr translation model using WMT data.""" # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(en_dev, fr_dev) train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(float(loss)) if loss < 300 else float("inf") print ("global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float( "inf") print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): """Train a en->fr translation model using WMT data.""" from_train = None to_train = None from_dev = None to_dev = None if FLAGS.from_train_data and FLAGS.to_train_data: from_train_data = FLAGS.from_train_data to_train_data = FLAGS.to_train_data from_dev_data = from_train_data to_dev_data = to_train_data if FLAGS.from_dev_data and FLAGS.to_dev_data: from_dev_data = FLAGS.from_dev_data to_dev_data = FLAGS.to_dev_data from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_dir, from_train_data, to_train_data, from_dev_data, to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size) else: # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess,True) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] Output_first = open('first_layer_states.txt','wb',1000) Output_second = open('second_layer_states.txt','wb',1000) en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) model.batch_size=1 with gfile.GFile(FLAGS.test_data, mode="rb") as f: for sentence in f: #sentence = sys.stdin.readline() #while sentence: print(sentence) # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits,enc_last_state,_ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True,0) first_layer = np.array(enc_last_state[0]) mat_first_layer = np.matrix(first_layer) for line in mat_first_layer: np.savetxt(Output_first, line, fmt='%.2f') second_layer = np.array(enc_last_state[1]) mat_second_layer = np.matrix(second_layer) for line in mat_second_layer: np.savetxt(Output_second, line, fmt='%.2f')
def train(): """Train a en->fr translation model using WMT data.""" from_train = None to_train = None from_dev = None to_dev = None if FLAGS.from_train_data and FLAGS.to_train_data: from_train_data = FLAGS.from_train_data to_train_data = FLAGS.to_train_data from_dev_data = from_train_data to_dev_data = to_train_data if FLAGS.from_dev_data and FLAGS.to_dev_data: from_dev_data = FLAGS.from_dev_data to_dev_data = FLAGS.to_dev_data from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_dir, from_train_data, to_train_data, from_dev_data, to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size) else: # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size) # Merge all summaries and write them out merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train') with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) train_writer.add_graph(sess.graph) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp( float(loss)) if loss < 300 else float("inf") print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush() train_writer.close()
def train(): """Train a en->fr translation model using WMT data.""" # Prepare WMT data. print("Preparing data in %s" % FLAGS.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size) print("train_fm: " + en_train) print("train_to: " + fr_train) print("fm_dev : " + en_dev) print("to_dev : " + fr_dev) # exit() with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(en_dev, fr_dev) train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 ckpt_cnt = 4 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 if ((FLAGS.steps_per_checkpoint < 100) or (current_step % int(FLAGS.steps_per_checkpoint / 100)) == 0): print('.', end='') sys.stdout.flush() # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. if current_step % (FLAGS.steps_per_checkpoint * 10) == 0: ckpt_cnt += 1 perplexity = math.exp(loss) if loss < 300 else float('inf') # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join( FLAGS.train_dir, "translate.%04d.ckpt" % ckpt_cnt) model.saver.save(sess, checkpoint_path, global_step=model.global_step) accum, cnts = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]): encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') cnts += len(dev_set[bucket_id]) accum += len(dev_set[bucket_id]) * eval_ppx mean_eval_ppx = accum / cnts print( "\nGlobal step %d learning rate %.4f step-time %.2f perplexity " "%.2f %.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity, mean_eval_ppx)) sys.stdout.flush() step_time, loss = 0.0, 0.0
def train(): print("Preparing data in %s" % FLAGS.data_dir) in_train, out_train, in_dev, out_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_dev, out_dev) train_set = read_data(in_train, out_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 for bucket_id in xrange(len(_buckets)): encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train_early_stop(): """Train a en->fr translation model using AMR data with early stopping.""" # Prepare data. print("Preparing WMT data in %s" % FLAGS.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size, FLAGS.amrseq_version) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(en_dev, fr_dev) train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] done_looping = False improvement_threshold = 0.995 best_eval_total_ppx = np.inf best_step = 0 patience = int(train_total_size / FLAGS.batch_size) # go over this number of steps(batches) anyway patience_increase = 2 while model.global_step.eval() < FLAGS.max_steps and (not done_looping): # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print ("Current step %d, global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (current_step, model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. #checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") #model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. eval_total_ppx = 0.0 # total perplexity in all validation buckets tmp_batch_size = model.batch_size for bucket_id in xrange(len(_buckets)): model.batch_size = len(dev_set[bucket_id]) # eval the whole bucket encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') eval_total_ppx += eval_ppx print(" eval: bucket %d size:%d perplexity %.2f" % (bucket_id, model.batch_size, eval_ppx)) model.batch_size = tmp_batch_size if eval_total_ppx < best_eval_total_ppx: if (eval_total_ppx < best_eval_total_ppx * improvement_threshold): # the improvement is good enough patience = max(patience, model.global_step.eval() * patience_increase) best_eval_total_ppx = eval_total_ppx best_step = model.global_step.eval() # save the current checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) if patience <= model.global_step.eval(): done_looping = True sys.stdout.flush() print("Optimization complete. Best total validation perplexity %f obtained at global step %d." % (best_eval_total_ppx, best_step))
def train(): """Train a en->fr translation model using WMT data.""" # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.en" % FLAGS.fr_vocab_size) #en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1) with tf.Session(config=tf.ConfigProto(device_count={'GPU':1}, gpu_options = gpu_options)) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(en_dev, fr_dev) train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size) #embed() train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print ("global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) print ("step loss:%.4f", step_loss) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt"+str(loss)) model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=0) model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0
def train(): """Train a src->trg translation model.""" print("Preparing training and dev data in %s" % FLAGS.data_dir) src_train, trg_train, src_dev, trg_dev, src_vocab_path, trg_vocab_path = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.src_vocab_size, FLAGS.trg_vocab_size) src_vocab, rev_src_vocab = data_utils.initialize_vocabulary(src_vocab_path) trg_vocab, rev_trg_vocab = data_utils.initialize_vocabulary(trg_vocab_path) if FLAGS.src_vocab_size > len(src_vocab): FLAGS.src_vocab_size = len(src_vocab) if FLAGS.trg_vocab_size > len(trg_vocab): FLAGS.trg_vocab_size = len(trg_vocab) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units with word embedding %d." % (FLAGS.num_layers, FLAGS.hidden_units, FLAGS.hidden_edim)) model = create_model(sess, False) dev_set = read_data(src_dev, trg_dev) train_set = read_data(src_train, trg_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, encoder_mask, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, encoder_mask, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print("global step %d learning rate %.8f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, encoder_mask, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, encoder_mask, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): """Train a en->fr translation model using WMT data.""" # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size) # Only allocate 2/3 of the gpu memory to allow for running gpu-based predictions while training: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(en_dev, fr_dev) train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size) # for bucket_id, (source_size, target_size) in enumerate(_buckets): # print("data set index %d count: %d" % (bucket_id, len(train_set[bucket_id]))) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] best_eval_ppx = float('inf') while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. eval_ppx_list = [] for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') eval_ppx_list.append(eval_ppx) print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush() mean_eval_ppx = np.mean(eval_ppx_list) if mean_eval_ppx < best_eval_ppx: best_eval_ppx = mean_eval_ppx
def train(): """Train a en->fr translation model using WMT data.""" # Prepare WMT data. logging.debug("Preparing WMT data in %s" % FLAGS.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size) # Beam search is false during training operation and usedat inference . beam_search = False beam_size = 5 attention = FLAGS.attention with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: # Create model. logging.debug("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) #model = create_model(sess, False) model = create_model(sess, False, beam_search=beam_search, beam_size=beam_size, attention=attention) # Read data into buckets and compute their sizes. logging.debug("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(en_dev, fr_dev) logging.debug("Finish reading data") train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] logging.debug('Started training') while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) #_, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, # target_weights, bucket_id, False) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False, beam_search) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Printing perplexity every 10 iterations for plotting #if current_step % 10 == 0: #perplexity10 = math.exp(float(loss)) if loss < 300 else float("inf") #logging.debug("Plot: global step %d learning rate %.4f step-time %.2f perplexity " # "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), # step_time, perplexity10)) # Once in a while, we save checkpoint, logging.debugstatistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") logging.debug( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "ama.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and logging.debugtheir perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: logging.debug(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( float(eval_loss)) if eval_loss < 300 else float("inf") logging.debug(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): """Train a en->fr translation model using WMT data.""" from_train = None to_train = None from_dev = None to_dev = None if FLAGS.from_train_data and FLAGS.to_train_data: from_train_data = FLAGS.from_train_data to_train_data = FLAGS.to_train_data from_dev_data = from_train_data to_dev_data = to_train_data if FLAGS.from_dev_data and FLAGS.to_dev_data: from_dev_data = FLAGS.from_dev_data to_dev_data = FLAGS.to_dev_data from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_dir, from_train_data, to_train_data, from_dev_data, to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size) else: # Prepare WMT data. print("Preparing data in %s" % FLAGS.data_dir) from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] eval_ppx_history = [] #while current_step < FLAGS.max_num_steps: while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _, _ = model.step( sess, encoder_inputs, decoder_inputs, ###, _ target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.4f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") if not os.path.isabs(checkpoint_path): checkpoint_path = os.path.abspath( os.path.join(os.getcwd(), checkpoint_path)) model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 eval_ppx = np.zeros(len(_buckets), dtype=np.float32) for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue # 0717 newly modified num_buckets = int( math.ceil(1.0 * len(dev_set[bucket_id]) / FLAGS.batch_size)) eval_loss = np.zeros(num_buckets, dtype=np.float32) for idx in range(num_buckets): encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss[idx], _, eval_lasthidden = model.step( sess, encoder_inputs, decoder_inputs, ### target_weights, bucket_id, True) eval_ppx = math.exp(np.mean( eval_loss)) if eval_loss.mean() < 300 else float("inf") print(" eval: bucket %d perplexity %.4f" % (bucket_id, eval_ppx)) # 0717 newly added: Stop criteria, minimum point passing 400 epoch population = np.array([ len(dev_set[bucket_id]) for bucket_id in xrange(len(_buckets)) ]) total_eval_ppx = np.sum(eval_ppx * population) print(" totsl eval perplexity %.4f" % total_eval_ppx) if len(eval_ppx_history) == 0: eval_ppx_history.append(total_eval_ppx) sys.stdout.flush() continue if total_eval_ppx > eval_ppx_history[0]: eval_ppx_history.append(total_eval_ppx) if total_eval_ppx > eval_ppx_history[-1]: sess.run(model.learning_rate_decay_op) if len(eval_ppx_history) == 5: sys.stdout.flush() break else: eval_ppx_history = [total_eval_ppx] sys.stdout.flush()
def train(): """Train a en->fr translation model using WMT data.""" from_train = None to_train = None from_dev = None to_dev = None if FLAGS.from_train_data and FLAGS.to_train_data: from_train_data = FLAGS.from_train_data to_train_data = FLAGS.to_train_data from_dev_data = from_train_data to_dev_data = to_train_data if FLAGS.from_dev_data and FLAGS.to_dev_data: from_dev_data = FLAGS.from_dev_data to_dev_data = FLAGS.to_dev_data from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_dir, from_train_data, to_train_data, from_dev_data, to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size) else: # Prepare WMT data. handleInfo(str("Preparing WMT data in : " + str(FLAGS.data_dir))) from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size) with tf.Session() as sess: # Create model. handleInfo(str("Creating " + str(FLAGS.num_layers) + " layers of " + str(FLAGS.size) + " units.")) model = create_model(sess, False) # Read data into buckets and compute their sizes. handleInfo( str("Reading development and training data (limit: " + str(FLAGS.max_train_data_size) + ").")) dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch(train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(float(loss)) if loss < 300 else float("inf") perplexityRound = round(perplexity, 1) if perplexityRound < 9.9: if not _lowestPerplexity: _lowestPerplexity.append(perplexityRound) _lowestPerplexity.append(0) if perplexityRound == _lowestPerplexity[0]: if _lowestPerplexity[1] > 9: break else: count = _lowestPerplexity[1] count = count + 1 _lowestPerplexity[1] = count else: if perplexityRound < _lowestPerplexity[0]: _lowestPerplexity[0] = perplexityRound _lowestPerplexity[1] = 1 handleInfo("Lowest Perplexity List--" + str(_lowestPerplexity)) message = "global step " + str( model.global_step.eval()) + " learning rate " + str( model.learning_rate.eval()) + " step-time " + str( step_time) + " perplexity " + str(perplexity) handleInfo(message) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir,"translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: handleInfo(str("Eval: empty bucket : " + str(bucket_id))) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float("inf") handleInfo(str("Eval: bucket " + str(bucket_id) + " perplexity : " + str(eval_ppx))) sys.stdout.flush()
def train(): """Train a en->fr translation model using WMT data.""" from_train = None to_train = None from_dev = None to_dev = None if FLAGS.from_train_data and FLAGS.to_train_data: from_train_data = FLAGS.from_train_data to_train_data = FLAGS.to_train_data from_dev_data = from_train_data to_dev_data = to_train_data if FLAGS.from_dev_data and FLAGS.to_dev_data: from_dev_data = FLAGS.from_dev_data to_dev_data = FLAGS.to_dev_data from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_dir, from_train_data, to_train_data, from_dev_data, to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size) en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) _, rev_fr_vocab = data_utils.initialize_vocabulary(en_vocab_path) else: # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size) train_graph = tf.Graph() eval_graph = tf.Graph() train_sess = tf.Session(graph=train_graph) eval_sess = tf.Session(graph=eval_graph) #eval_sess = tf_debug.LocalCLIDebugWrapperSession(eval_sess) with train_graph.as_default(): # Create train model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) train_model = create_model(train_sess, False) with eval_graph.as_default(): # Create eval model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) eval_model = create_model(eval_sess, True) #with tf.Session() as sess: # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights, target_inputs, sent_ids = train_model.get_batch( train_set, bucket_id) _, step_loss, _ = train_model.step(train_sess, encoder_inputs, decoder_inputs, target_weights, target_inputs, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(float(loss)) if loss < 300 else float("inf") print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (train_model.global_step.eval(session=train_sess), train_model.learning_rate.eval(session=train_sess), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): train_sess.run(train_model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") ckpt_path = train_model.saver.save( train_sess, checkpoint_path, global_step=train_model.global_step) eval_model.saver.restore(eval_sess, ckpt_path) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. print("run evals") ft = open('tmp.eval.ids', 'w') for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue all_encoder_inputs, all_decoder_inputs, all_target_weights, all_target_inputs, all_sent_ids = eval_model.get_all_batch( dev_set, bucket_id) #ipdb.set_trace() for idx in xrange(len(all_encoder_inputs)): _, eval_loss, output_logits = eval_model.step( eval_sess, all_encoder_inputs[idx], all_decoder_inputs[idx], all_target_weights[idx], all_target_inputs[idx], bucket_id, True) batch_ids = all_sent_ids[idx] #eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float( # "inf") #print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) #ipdb.set_trace() swap_inputs = np.array(all_encoder_inputs[idx]) swap_inputs = swap_inputs.swapaxes(0, 1) outputs = [ np.argmax(logit, axis=1) for logit in output_logits ] swap_outputs = np.array(outputs) swap_outputs = swap_outputs.swapaxes(0, 1) out_ids = [] for batch_id in xrange(len(swap_outputs)): out_ids.append( swap_inputs[batch_id][swap_outputs[batch_id]]) #if data_utils.EOS_ID in outputs: # t = [m[:m.index(data_utils.EOS_ID)] for m in t] for batch_id in xrange(len(swap_outputs)): #print(" ".join([tf.compat.as_str(rev_fr_vocab[o]) for o in m])) ft.write(" ".join([ tf.compat.as_str(rev_fr_vocab[o]) for o in out_ids[batch_id].tolist() ]) + "|" + str(batch_ids[batch_id]) + '\n') ft.close() print("converting output...") subprocess.call( "python convert_to_json.py --din tmp.eval.ids --dout out.json --dsource /users1/ybsun/seq2sql/WikiSQL/annotated/dev.jsonl", shell=True) print("running evaluation script...") subprocess.call( "python evaluate.py ../WikiSQL/data/dev.jsonl ../WikiSQL/data/dev.db ./out.json", shell=True) print("finish evals") sys.stdout.flush()
def train(): print("Preparing data in %s" % FLAGS.data_dir) in_train, out_train, in_dev, out_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) print ("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_dev, out_dev) train_set = read_data(in_train, out_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print ("global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 for bucket_id in xrange(len(_buckets)): encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): """Train a en->fr translation model using WMT data.""" from_train = None to_train = None from_dev = None to_dev = None if FLAGS.from_train_data and FLAGS.to_train_data: from_train_data = FLAGS.from_train_data to_train_data = FLAGS.to_train_data from_dev_data = from_train_data to_dev_data = to_train_data if FLAGS.from_dev_data and FLAGS.to_dev_data: from_dev_data = FLAGS.from_dev_data to_dev_data = FLAGS.to_dev_data from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_dir, from_train_data, to_train_data, from_dev_data, to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size) else: # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess,True) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while current_step<FLAGS.num_train_step: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _, enc_init_states,enc_all_outputs= model.step(sess, encoder_inputs, decoder_inputs,#MK change target_weights, bucket_id, True,1) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. first_layer = np.array(enc_init_states[0]) mat_first_layer = np.matrix(first_layer) with open('first_layer_states.txt','wb') as f: for line in mat_first_layer: np.savetxt(f, line, fmt='%.2f') second_layer = np.array(enc_init_states[1]) mat_second_layer = np.matrix(second_layer) with open('second_layer_states.txt','wb') as f: for line in mat_second_layer: np.savetxt(f, line, fmt='%.2f') perplexity = math.exp(float(loss)) if loss < 300 else float("inf") print ("global step %d learning rate %.4f step-time %.5f perplexity " "%.5f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _,_,_ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True,0) eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float( "inf") print(" eval: bucket %d perplexity %.5f" % (bucket_id, eval_ppx)) sys.stdout.flush() en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) max_iter=100 count=0 model.batch_size=1 with gfile.GFile(FLAGS.from_train_data, mode="rb") as f: for sentence in f: count=count+1 if max_iter < count: break #sentence = sys.stdin.readline() #while sentence: print(sentence) # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits,enc_all_state,_ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True,0) quit()
def train(): """Train a en->fr translation model using WMT data.""" # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.eng_vocab_size, FLAGS.hin_vocab_size) save_path = os.path.join(FLAGS.train_dir, "summary/") with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) test_writer = tf.train.SummaryWriter(os.path.join(save_path, 'test'), graph=sess.graph) train_writer = tf.train.SummaryWriter(os.path.join(save_path, 'train'), graph=sess.graph) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(en_dev, fr_dev) train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] perplexity_eval_summary = tf.Summary() perplexity_train_summary = tf.Summary() eps1 = exp_decay(float("inf")) #print(eps1) decode1 = sampling(eps1, float("inf"), _buckets[-1][1] + 1) while current_step < 80001: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) eps = exp_decay(current_step * 1.0) #print(eps) decode = sampling(eps, current_step * 1.0, _buckets[-1][1] + 1) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, decode, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint #loss += step_loss / FLAGS.steps_per_checkpoint loss = step_loss current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") bucket_trainvalue = perplexity_train_summary.value.add() bucket_trainvalue.tag = "peplexity_trainbucket_%d" % bucket_id bucket_trainvalue.simple_value = perplexity train_writer.add_summary(perplexity_train_summary, model.global_step.eval()) print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f bucketid: %d epsilon: %f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity, bucket_id, eps)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) learning_rate = tf.scalar_summary('learning_rate', model.learning_rate_decay_op) learning_str = sess.run(learning_rate) train_writer.add_summary(learning_str, model.global_step.eval()) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, decode1, True) eval_ppx = math.exp( float(eval_loss)) if eval_loss < 300 else float("inf") bucket_value = perplexity_eval_summary.value.add() bucket_value.tag = "peplexity_evalbucket_%d" % bucket_id bucket_value.simple_value = eval_ppx test_writer.add_summary(perplexity_eval_summary, model.global_step.eval()) print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): """Train a en->fr translation model using WMT data.""" from_train = None to_train = None from_dev = None to_dev = None print(FLAGS.data_dir) print(FLAGS.to_dev_data) if FLAGS.from_train_data and FLAGS.to_train_data: from_train_data = FLAGS.from_train_data to_train_data = FLAGS.to_train_data from_dev_data = from_train_data to_dev_data = to_train_data if FLAGS.from_dev_data and FLAGS.to_dev_data: from_dev_data = FLAGS.from_dev_data to_dev_data = FLAGS.to_dev_data from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_dir, from_train_data, to_train_data, from_dev_data, to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size) else: # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size) with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): np.set_printoptions(suppress=True) """Train a en->fr translation model using WMT data.""" # Prepare WMT data. print("Preparing WMT data in %s" % env.config.get("model", "data_dir")) en_train, fr_train, type_train, en_dev, fr_dev, type_dev, en_test, fr_test, type_test, _, _ = data_utils.prepare_wmt_data( env.config.get("model", "data_dir"), env.config.getint("model", "en_vocab_size"), latent=True, n_sense=env.config.getint("model", "num_z")) with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) as sess: # Create model. print("Creating %d layers of %d units." % (env.config.getint( "model", "num_layers"), env.config.getint("model", "size"))) model = create_model(sess, False) show_all_variables() # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % env.config.getint("model", "max_train_data_size")) dev_set, _ = read_data(en_dev, fr_dev, type_dev) #dev_set, _ = read_data(en_test, fr_test, type_test) train_set, train_order = read_data(en_train, fr_train, type_train, max_size=None) #test_set = read_data(en_test, fr_test, type_test, env.config.getint("model",.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = int(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] dev_bucket_sizes = [len(dev_set[b]) for b in xrange(len(_buckets))] dev_total_size = int(sum(dev_bucket_sizes)) # set env.config.getint("model",.steps_per_checkpoint = half/ epoch batch_size = env.config.getint("model", "batch_size") num_z = env.config.getint("model", "num_z") n_epoch = env.config.getint("model", "n_epoch") steps_per_epoch = int(train_total_size / batch_size) steps_per_dev = int(dev_total_size / batch_size) steps_per_checkpoint = steps_per_dev * 4 total_steps = steps_per_epoch * n_epoch # reports print(_buckets) print("Train:") print("total: {}".format(train_total_size)) print("buckets: ", train_bucket_sizes) print("Dev:") print("total: {}".format(dev_total_size)) print("buckets: ", dev_bucket_sizes) print() print("Steps_per_epoch:", steps_per_epoch) print("Total_steps:", total_steps) print("Steps_per_checkpoint:", steps_per_checkpoint) with_labeled_data = True isSGD = False # This is the training loop step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] his = [] local_alpha = 0.05 low_ppx = 10000000 low_ppx_step = 0 dite = DataIterator(model, train_set, len(train_buckets_scale), num_z, batch_size, train_buckets_scale, train_order) iteType = env.config.getint('model', 'iteType') if iteType == 0: print("withRandom") ite = dite.next_random() elif iteType == 1: print("withSequence") ite = dite.next_sequence() elif iteType == 2: print("withOrder") assert (batch_size == 1) ite = dite.next_sequence_continous() while current_step < total_steps: # for training data if with_labeled_data: start_time = time.time() encoder_inputs, decoder_inputs, target_weights, hiddens, bucket_id = ite.next( ) print(len(encoder_inputs)) _, _, _, L, norm, Q = model.batch_step( sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, labeled=True, true_hidden_inputs=hiddens) step_time += (time.time() - start_time) / steps_per_checkpoint loss += (-L) / steps_per_checkpoint / batch_size current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % steps_per_checkpoint == 0: print("--------------------", "TRAIN", current_step, "-------------------") # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) train_ppx = perplexity # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join( env.config.get("model", "train_dir"), "translate.ckpt") if env.config.getboolean('model', "saveCheckpoint"): print("Saving model....") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # dev data print("--------------------", "DEV", current_step, "-------------------") Q, L, cost, accuracy, eval_ppx = evaluate(sess, model, dev_set, _buckets, name="dev", show_stat=True, show_basic=True, show_sample=True) his.append( [current_step, Q, L, cost, accuracy, train_ppx, eval_ppx]) if eval_ppx < low_ppx: low_ppx = eval_ppx low_ppx_step = current_step sys.stdout.flush() # Decrease learning rate if current eval ppl is larger if len(previous_losses) > 5 and eval_ppx > max( previous_losses[-5:]): break #sess.run(model.learning_rate_decay_op) previous_losses.append(eval_ppx) # increase alpha if env.config.getboolean("model", "withAlpha"): if local_alpha + 0.1 <= 1.0: local_alpha += 0.1 with tf.variable_scope('', reuse=True) as scope: alpha = tf.get_variable( "embedding_rnn_seq2seq_latent/alpha") sess.run(alpha.assign([local_alpha])) print("alpha", local_alpha) print() low_index = 0 low_ppx = 1000000000 for i in xrange(len(his)): ep = his[i][-1] if low_ppx > ep: low_ppx = ep low_index = i theone = his[low_index] print(theone[0], "{:2f}/{:2f}".format(theone[-2], theone[-1]), theone[-3]) df = pd.DataFrame(his) df.columns = [ "step", "Q", "L", "cost", "Accuracy", "Train_ppx", "Eval_ppx" ] df.to_csv(os.path.join(env.config.get("model", "train_dir"), "log.csv"))