def train(self): train_enc, train_dec, dev_enc, dev_dec, _, _ = prepare_custom_data('cps/', 'data/train.enc', 'data/train.dec', 'data/test.enc', 'data/test.dec', 20000, 20000) conf = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.666)) conf.gpu_options.allocator_type = 'BFC' with tf.Session(config=conf) as session: model = self.get_or_create_model(session, False) dev_set,train_set,train_buckets_scale,step_time,loss,current_step,previous_losses = self.get_params(dev_enc, dev_dec, train_enc, train_dec) while True: b_id = min([i for i in xrange(train_buckets_scale.__len__()) if train_buckets_scale[i] > np.random.random_sample()]) start = time() encoder_inps, decoder_inps, target_weights = model.get_batch(train_set, b_id) _, step_loss, _ = model.step(session, encoder_inps, decoder_inps, target_weights, b_id, False) step_time = (time() - start)/300.0 loss += step_loss / 300.0 current_step += 1 if not current_step % 300: print("global step {} learning rate {} step_time {} preplexity {}".format(model.global_step.eval(), model.learning_rate.eval(), step_time, exp(loss) if loss < 300 else float('inf'))) if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): session.run(model.learning_rate_decay_op) previous_losses.append(loss) model.saver.save(session,path.join('cps/', 'seq2seq.ckpt'), global_step=model.global_step) step_time, loss = 0.0, 0.0 for _b_id in xrange(self.buckets.__len__()): if not dev_set[_b_id]: continue encoder_inps, decoder_inps, target_weights = model.get_batch(dev_set, _b_id) _,eval_loss,_ = model.step(session, encoder_inps, decoder_inps, target_weights, _b_id, True) print("ev: buck " + str(_b_id) + " perp " + str(exp(eval_loss)) if eval_loss < 300 else float('inf')) stdout.flush()
def train(): # prepare dataset print("Starting to train from " + working_directory) enc_train, dec_train, _, _ = data_utils.prepare_custom_data( working_directory, train_enc, train_dec, enc_vocab_size, dec_vocab_size) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: print("Creating model with %d layers and %d cells." % (num_layers, layer_size)) model = create_model(sess, False) train_set = read_data(enc_train, dec_train, max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / steps_per_checkpoint loss += step_loss / steps_per_checkpoint current_step += 1 if current_step % steps_per_checkpoint == 0: #perplexity = math.exp(loss) if loss < 300 else float('inf') print("Saved model at step %d with time %.2f " % (model.global_step.eval(), step_time)) if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) checkpoint_path = os.path.join(working_directory, "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 sys.stdout.flush()
def train(): #prepare dataset enc_train, dec_train = data_utils.prepare_custom_data( gConfig['working_directory']) train_set = read_data(enc_train, dec_train) with tf.Session(config=config) as sess: model = create_model(sess, false) while True: sess.run(model)
def train_model(checkpoint_dir): with tf.Session() as sess: model = create_model(sess, False, checkpoint_dir) sess.run(tf.initialize_all_variables()) print("Variables initialized...") enc_train, dec_train, _, _ = data_utils.prepare_custom_data( data_dir, encoding_file, decoding_file, source_vocab_size, target_vocab_size) print("Data encoded...") train_set = read_data(enc_train, dec_train, max_size=None) train_bucket_sizes = [len(train_set[b]) for b in range(len(buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in range(len(train_bucket_sizes))] step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] print("Starting training...") while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in range(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS_steps_per_checkpoint loss += step_loss / FLAGS_steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS_steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print("global step %d step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), step_time, perplexity)) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(checkpoint_dir, "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 sys.stdout.flush()
def train(): encoder_data, decoder_data = data_utils.prepare_custom_data( gConfig['working_directory']) train_set = read_data(encoder_data, decoder_data)
def train(): # prepare dataset print("Preparing data in %s" % gConfig['working_directory']) enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data(gConfig['working_directory'],gConfig['train_enc'],gConfig['train_dec'],gConfig['test_enc'],gConfig['test_dec'],gConfig['enc_vocab_size'],gConfig['dec_vocab_size']) # setup config to use BFC allocator config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size'])) model = create_model(sess, False) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % gConfig['max_train_data_size']) dev_set = read_data(enc_dev, dec_dev) train_set = read_data(enc_train, dec_train, gConfig['max_train_data_size']) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint'] loss += step_loss / gConfig['steps_per_checkpoint'] current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % gConfig['steps_per_checkpoint'] == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print ("global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(gConfig['working_directory'], "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): #generate dataset enc_train, dec_train = data_utils.prepare_custom_data( gConfig['working_directory']) train_set - read
def train(): #prepare dataset enc_train, dec_train = data_utils.prepare_custom_data( gConfig['working_directory']) train_set = read_data(enc_train, dec_train)
def train(): #prepare said dataset encoder_train, decoder_train = data_utils.prepare_custom_data( gConfig['working_directory']) #attempt to tokenize the sentences train_set = read_data(encorder_train, decoder_train)
def train(): # prepare dataset print("Preparing data in %s" % gConfig['working_directory']) enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data(gConfig['working_directory'],gConfig['train_enc'],gConfig['train_dec'],gConfig['test_enc'],gConfig['test_dec'],gConfig['enc_vocab_size'],gConfig['dec_vocab_size']) # setup config to use BFC allocator config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size'])) model = create_model(sess, False) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % gConfig['max_train_data_size']) # dev_set = read_data(enc_dev, dec_dev) # For validation train_set = read_data(enc_train, dec_train, gConfig['max_train_data_size']) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # op to write logs to Tensorboard summary_writer = tf.summary.FileWriter(gConfig['log_dir'], graph=sess.graph) # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while model.global_step.eval() <= gConfig['max_num_steps']: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) step_loss_summary = tf.Summary() learning_rate_summary = tf.Summary() # embedding_summary = tf.Summary() # Debug # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) # _, step_loss, _, embedding_matrix = model.step(sess, encoder_inputs, decoder_inputs, # target_weights, bucket_id, False) step_loss_value = step_loss_summary.value.add() step_loss_value.tag = "step loss" step_loss_value.simple_value = step_loss.astype(float) learning_rate_value = learning_rate_summary.value.add() learning_rate_value.tag = "learning rate" learning_rate_value.simple_value = model.learning_rate.eval().astype(float) # pdb.set_trace() # embedding_value = embedding_summary.value.add() # embedding_value.tag = "embedding matrix mean" # embedding_value.simple_value = np.mean(embedding_matrix).astype(float) # Write logs at every iteration summary_writer.add_summary(step_loss_summary, model.global_step.eval()) summary_writer.add_summary(learning_rate_summary, model.global_step.eval()) # summary_writer.add_summary(embedding_summary, model.global_step.eval()) step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint'] loss += step_loss / gConfig['steps_per_checkpoint'] current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % 50 == 0 or current_step == 1: # Print statistics for the previous epoch. if current_step == 1: # change learning rate in fine-tuning (uncomment next line for fine-tuning) # sess.run(model.learning_rate_finetune_op) # sess.run(model.learning_rate.assign(tf.Variable(float(0.0005), trainable=False))) perplexity = math.exp(step_loss) if step_loss < 300 else float('inf') print ("global step %d learning rate %.7f loss %.5f perplexity %.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_loss, perplexity)) else: perplexity = math.exp(loss) if loss < 300 else float('inf') print ("global step %d learning rate %.4f step-time %.2f loss %.4f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, loss, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(gConfig['model_directory'], "seq2seq.ckpt") if current_step % 5000 == 0: model.saver.save(sess, checkpoint_path, global_step=model.global_step) # step_time, loss = 0.0, 0.0 # # Run evals on development set and print their perplexity. (used for validation) # for bucket_id in xrange(len(_buckets)): # if len(dev_set[bucket_id]) == 0: # print(" eval: empty bucket %d" % (bucket_id)) # continue # encoder_inputs, decoder_inputs, target_weights = model.get_batch( # dev_set, bucket_id) # _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, # target_weights, bucket_id, True) # eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') # print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): # prepare dataset print("Preparing data in %s" % gConfig['working_directory']) enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data(gConfig['working_directory'],gConfig['train_enc'],gConfig['train_dec'],gConfig['test_enc'],gConfig['test_dec'],gConfig['enc_vocab_size'],gConfig['dec_vocab_size']) # Only allocate 2/3 of the gpu memory to allow for running gpu-based predictions while training: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size'])) model = create_model(sess, False) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % gConfig['max_train_data_size']) dev_set = read_data(enc_dev, dec_dev) train_set = read_data(enc_train, dec_train, gConfig['max_train_data_size']) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint'] loss += step_loss / gConfig['steps_per_checkpoint'] current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % gConfig['steps_per_checkpoint'] == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print ("global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # if no improvement was seen over last 3 times , decrese the learning rate if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) checkpoint_path = os.path.join(gConfig['working_directory'], "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): # prepare encoding data (what's heard) with decoding data (response) enc_train, dec_train = data_utils.prepare_custom_data( gConfig['working_directory']) train_set = read_data(enc_train, dec_train)
response = " ".join( [tf.compat.as_str(vocab_list[output]) for output in outputs]) sentence = prompt_user("next", response) def prompt_user(phase, response=None): if phase == "start": print("Your query: ") elif phase == "next": print("Trumps response: ") print(response) elif phase == "long": print("Your input was too long. Please try a shorter sentence.") sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() return sentence if __name__ == '__main__': if config.mode == 'train': # print("Preparing data in %s" % config.working_directory) data_utils.prepare_custom_data() config_tf = tf.ConfigProto() # setup config to use BFC allocator config_tf.gpu_options.allocator_type = 'BFC' train() elif config.mode == 'test': data_utils.load_en() test()
def train(): #Prepares the dataset enc_train, dec_train = data_utils.prepare_custom_data(gConfig['working_directory']) train_set = read_data(enc_train, dec_train)
def train(): # Prepare dataset print("Preparing data in %s" % gConfig['working_directory']) enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data( gConfig['working_directory'], gConfig['train_enc'], gConfig['train_dec'], gConfig['test_enc'], gConfig['test_dec'], gConfig['enc_vocab_size'], gConfig['dec_vocab_size']) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size'])) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % gConfig['max_train_data_size']) dev_set = read_data(enc_dev, dec_dev) train_set = read_data(enc_train, dec_train, gConfig['max_train_data_size']) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # Train loop step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Count the step of the training process start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint'] loss += step_loss / gConfig['steps_per_checkpoint'] current_step += 1 # Save checkpoints and print statistics if current_step % gConfig['steps_per_checkpoint'] == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint of training process checkpoint_path = os.path.join(gConfig['working_directory'], "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): # prepare dataset print("Preparing data in %s" % gConfig['working_directory']) enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data( gConfig['working_directory'], gConfig['train_enc'], gConfig['train_dec'], gConfig['test_enc'], gConfig['test_dec'], gConfig['enc_vocab_size'], gConfig['dec_vocab_size']) print("vocabulary created.") exit(0) # Only allocate 2/3 of the gpu memory to allow for running gpu-based predictions while training: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size'])) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % gConfig['max_train_data_size']) dev_set = read_data(enc_dev, dec_dev) train_set = read_data(enc_train, dec_train, gConfig['max_train_data_size']) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint'] loss += step_loss / gConfig['steps_per_checkpoint'] current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % gConfig['steps_per_checkpoint'] == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(gConfig['working_directory'], "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): # prepare dataset if not (tf.gfile.Exists(gConfig['working_dir'] + 'im_filename.txt') and tf.gfile.Exists(gConfig['working_dir'] + 'caption_a.txt') and tf.gfile.Exists(gConfig['working_dir'] + 'caption_b.txt')): print("Creating dataset...") # generate im_filename, caption_a, caption_b and save them to working_dir data_utils.parse_MSCOCO(gConfig['mscoco_path'], gConfig['working_dir'], permute=True) # prepare encoder/decoder inputs print("Preparing data in %s" % gConfig['working_dir']) enc_train, dec_train, _, _ = data_utils.prepare_custom_data( gConfig['working_dir'], gConfig['working_dir'] + 'caption_a.txt', gConfig['working_dir'] + 'caption_b.txt', gConfig['enc_vocab_size'], gConfig['dec_vocab_size']) # setup config to use BFC allocator config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' config.gpu_options.allow_growth = True # "chunks" with tf.Session(config=config) as sess: # create model print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size'])) sess = tf_debug.LocalCLIDebugWrapperSession(sess) sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading training data (limit: %d)." % gConfig['max_train_data_size']) train_set = read_vector(sess, gConfig['working_dir'] + 'im_filename.txt', enc_train, dec_train, gConfig['max_train_data_size']) # train_set = read_vector(sess, gConfig['working_dir'] + 'toy_im_filename.txt', # gConfig['working_dir'] + 'toy_caption_a.txt.ids50000', # gConfig['working_dir'] + 'toy_caption_b.txt.ids50000', # gConfig['max_train_data_size']) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # op to write logs to Tensorboard summary_writer = tf.summary.FileWriter(gConfig['log_dir'], graph=sess.graph) # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] # pdb.set_trace() while model.global_step.eval() <= gConfig['max_num_steps']: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) step_loss_summary = tf.Summary() learning_rate_summary = tf.Summary() # Get a batch and make a step. start_time = time.time() decoder_hiddens, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, decoder_hiddens, decoder_inputs, target_weights, bucket_id, False) step_loss_value = step_loss_summary.value.add() step_loss_value.tag = "step loss" step_loss_value.simple_value = step_loss.astype(float) learning_rate_value = learning_rate_summary.value.add() learning_rate_value.tag = "learning rate" learning_rate_value.simple_value = model.learning_rate.eval( ).astype(float) # Write logs at every iteration summary_writer.add_summary(step_loss_summary, model.global_step.eval()) summary_writer.add_summary(learning_rate_summary, model.global_step.eval()) step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint'] loss += step_loss / gConfig['steps_per_checkpoint'] current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % gConfig[ 'steps_per_checkpoint'] == 0 or current_step == 1: # Print statistics for the previous epoch. if current_step == 1: perplexity = math.exp( step_loss) if step_loss < 300 else float('inf') print( "global step %d learning rate %.4f loss %.4f perplexity %.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_loss, perplexity)) else: perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f loss %.4f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, loss, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(gConfig['model_dir'], "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 sys.stdout.flush()