Beispiel #1
0
 def train(self):
     train_enc, train_dec, dev_enc, dev_dec, _, _ = prepare_custom_data('cps/',
             'data/train.enc', 'data/train.dec', 'data/test.enc', 'data/test.dec', 20000, 20000)
     conf = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.666))
     conf.gpu_options.allocator_type = 'BFC'
     with tf.Session(config=conf) as session:
         model = self.get_or_create_model(session, False)
         dev_set,train_set,train_buckets_scale,step_time,loss,current_step,previous_losses = self.get_params(dev_enc,
                 dev_dec, train_enc, train_dec)
         while True:
             b_id = min([i for i in xrange(train_buckets_scale.__len__()) if train_buckets_scale[i] > np.random.random_sample()])
             start = time()
             encoder_inps, decoder_inps, target_weights = model.get_batch(train_set, b_id)
             _, step_loss, _ = model.step(session, encoder_inps, decoder_inps, target_weights, b_id, False)
             step_time = (time() - start)/300.0
             loss += step_loss / 300.0
             current_step += 1
             if not current_step % 300:
                 print("global step {} learning rate {} step_time {} preplexity {}".format(model.global_step.eval(), model.learning_rate.eval(), step_time, 
                     exp(loss) if loss < 300 else float('inf')))
                 if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                     session.run(model.learning_rate_decay_op)
                 previous_losses.append(loss)
                 model.saver.save(session,path.join('cps/', 'seq2seq.ckpt'), global_step=model.global_step)
                 step_time, loss = 0.0, 0.0
                 for _b_id in xrange(self.buckets.__len__()):
                     if not dev_set[_b_id]:
                         continue
                     encoder_inps, decoder_inps, target_weights = model.get_batch(dev_set, _b_id)
                     _,eval_loss,_ = model.step(session, encoder_inps, decoder_inps, target_weights, _b_id, True)
                     print("ev: buck " + str(_b_id) + " perp "  + str(exp(eval_loss)) if eval_loss < 300 else float('inf'))
                 stdout.flush()
Beispiel #2
0
def train():
    # prepare dataset
    print("Starting to train from " + working_directory)
    enc_train, dec_train, _, _ = data_utils.prepare_custom_data(
        working_directory, train_enc, train_dec, enc_vocab_size,
        dec_vocab_size)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666)
    config = tf.ConfigProto(gpu_options=gpu_options)
    config.gpu_options.allocator_type = 'BFC'

    with tf.Session(config=config) as sess:
        print("Creating model with %d layers and %d cells." %
              (num_layers, layer_size))
        model = create_model(sess, False)
        train_set = read_data(enc_train, dec_train, max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() - start_time) / steps_per_checkpoint
            loss += step_loss / steps_per_checkpoint
            current_step += 1

            if current_step % steps_per_checkpoint == 0:
                #perplexity = math.exp(loss) if loss < 300 else float('inf')
                print("Saved model at step %d with time %.2f " %
                      (model.global_step.eval(), step_time))
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                checkpoint_path = os.path.join(working_directory,
                                               "seq2seq.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                sys.stdout.flush()
Beispiel #3
0
def train():
    #prepare dataset
    enc_train, dec_train = data_utils.prepare_custom_data(
        gConfig['working_directory'])

    train_set = read_data(enc_train, dec_train)

    with tf.Session(config=config) as sess:
        model = create_model(sess, false)

        while True:
            sess.run(model)
Beispiel #4
0
def train_model(checkpoint_dir):
    with tf.Session() as sess:
        model = create_model(sess, False, checkpoint_dir)
        sess.run(tf.initialize_all_variables())
        print("Variables initialized...")

        enc_train, dec_train, _, _ = data_utils.prepare_custom_data(
            data_dir, encoding_file, decoding_file, source_vocab_size, target_vocab_size)
        print("Data encoded...")
        train_set = read_data(enc_train, dec_train, max_size=None)
        train_bucket_sizes = [len(train_set[b]) for b in range(len(buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                               for i in range(len(train_bucket_sizes))]

        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        print("Starting training...")
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([i for i in range(len(train_buckets_scale))
                             if train_buckets_scale[i] > random_number_01])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() - start_time) / FLAGS_steps_per_checkpoint
            loss += step_loss / FLAGS_steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS_steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print("global step %d step-time %.2f perplexity "
                      "%.2f" % (model.global_step.eval(), step_time, perplexity))
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(checkpoint_dir, "seq2seq.ckpt")
                model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                sys.stdout.flush()
def train():

    encoder_data, decoder_data = data_utils.prepare_custom_data(
        gConfig['working_directory'])
    train_set = read_data(encoder_data, decoder_data)
Beispiel #6
0
def train():
  # prepare dataset
  print("Preparing data in %s" % gConfig['working_directory'])
  enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data(gConfig['working_directory'],gConfig['train_enc'],gConfig['train_dec'],gConfig['test_enc'],gConfig['test_dec'],gConfig['enc_vocab_size'],gConfig['dec_vocab_size'])

  # setup config to use BFC allocator
  config = tf.ConfigProto()
  config.gpu_options.allocator_type = 'BFC'

  with tf.Session(config=config) as sess:
    # Create model.
    print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size']))
    model = create_model(sess, False)

    # Read data into buckets and compute their sizes.
    print ("Reading development and training data (limit: %d)."
           % gConfig['max_train_data_size'])
    dev_set = read_data(enc_dev, dec_dev)
    train_set = read_data(enc_train, dec_train, gConfig['max_train_data_size'])
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while True:
      # Choose a bucket according to data distribution. We pick a random number
      # in [0, 1] and use the corresponding interval in train_buckets_scale.
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      # Get a batch and make a step.
      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint']
      loss += step_loss / gConfig['steps_per_checkpoint']
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % gConfig['steps_per_checkpoint'] == 0:
        # Print statistics for the previous epoch.
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
        # Decrease learning rate if no improvement was seen over last 3 times.
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join(gConfig['working_directory'], "seq2seq.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        # Run evals on development set and print their perplexity.
        for bucket_id in xrange(len(_buckets)):
          if len(dev_set[bucket_id]) == 0:
            print("  eval: empty bucket %d" % (bucket_id))
            continue
          encoder_inputs, decoder_inputs, target_weights = model.get_batch(
              dev_set, bucket_id)
          _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True)
          eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')
          print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
        sys.stdout.flush()
Beispiel #7
0
def train():
    #generate dataset
    enc_train, dec_train = data_utils.prepare_custom_data(
        gConfig['working_directory'])

    train_set - read
def train():
    #prepare dataset
    enc_train, dec_train = data_utils.prepare_custom_data(
        gConfig['working_directory'])

    train_set = read_data(enc_train, dec_train)
def train():
    #prepare said dataset
    encoder_train, decoder_train = data_utils.prepare_custom_data(
        gConfig['working_directory'])
        #attempt to tokenize the sentences
    train_set = read_data(encorder_train, decoder_train)
def train():
  # prepare dataset
  print("Preparing data in %s" % gConfig['working_directory'])
  enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data(gConfig['working_directory'],gConfig['train_enc'],gConfig['train_dec'],gConfig['test_enc'],gConfig['test_dec'],gConfig['enc_vocab_size'],gConfig['dec_vocab_size'])
  # setup config to use BFC allocator
  config = tf.ConfigProto()
  config.gpu_options.allocator_type = 'BFC'
  # config.gpu_options.per_process_gpu_memory_fraction = 0.5
  config.gpu_options.allow_growth = True
  with tf.Session(config=config) as sess:
    # Create model.
    print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size']))
    model = create_model(sess, False)

    # Read data into buckets and compute their sizes.
    print ("Reading development and training data (limit: %d)."
           % gConfig['max_train_data_size'])
    # dev_set = read_data(enc_dev, dec_dev)  # For validation
    train_set = read_data(enc_train, dec_train, gConfig['max_train_data_size'])
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # op to write logs to Tensorboard
    summary_writer = tf.summary.FileWriter(gConfig['log_dir'], graph=sess.graph)

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while model.global_step.eval() <= gConfig['max_num_steps']:
      # Choose a bucket according to data distribution. We pick a random number
      # in [0, 1] and use the corresponding interval in train_buckets_scale.
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      step_loss_summary = tf.Summary()
      learning_rate_summary = tf.Summary()
    #   embedding_summary = tf.Summary()  # Debug
     
      # Get a batch and make a step.
      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
    #   _, step_loss, _, embedding_matrix = model.step(sess, encoder_inputs, decoder_inputs,
    #                                target_weights, bucket_id, False)

      step_loss_value = step_loss_summary.value.add()
      step_loss_value.tag = "step loss"
      step_loss_value.simple_value = step_loss.astype(float)
      learning_rate_value = learning_rate_summary.value.add()
      learning_rate_value.tag = "learning rate"
      learning_rate_value.simple_value = model.learning_rate.eval().astype(float)
      # pdb.set_trace()
    #   embedding_value = embedding_summary.value.add()
    #   embedding_value.tag = "embedding matrix mean"
    #   embedding_value.simple_value = np.mean(embedding_matrix).astype(float)

      # Write logs at every iteration
      summary_writer.add_summary(step_loss_summary, model.global_step.eval())
      summary_writer.add_summary(learning_rate_summary, model.global_step.eval())
    #   summary_writer.add_summary(embedding_summary, model.global_step.eval())

      step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint']
      loss += step_loss / gConfig['steps_per_checkpoint']
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % 50 == 0 or current_step == 1:
		# Print statistics for the previous epoch.
		if current_step == 1:
			# change learning rate in fine-tuning (uncomment next line for fine-tuning)
			# sess.run(model.learning_rate_finetune_op)
			# sess.run(model.learning_rate.assign(tf.Variable(float(0.0005), trainable=False)))
			perplexity = math.exp(step_loss) if step_loss < 300 else float('inf')
			print ("global step %d learning rate %.7f loss %.5f perplexity %.2f"
				% (model.global_step.eval(), model.learning_rate.eval(),
				step_loss, perplexity))
		else:
			perplexity = math.exp(loss) if loss < 300 else float('inf')
			print ("global step %d learning rate %.4f step-time %.2f loss %.4f perplexity "
			   "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
						 step_time, loss, perplexity))
		# Decrease learning rate if no improvement was seen over last 3 times.
		if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
			sess.run(model.learning_rate_decay_op)
		previous_losses.append(loss)
		# Save checkpoint and zero timer and loss.
		checkpoint_path = os.path.join(gConfig['model_directory'], "seq2seq.ckpt")
		if current_step % 5000 == 0:
			model.saver.save(sess, checkpoint_path, global_step=model.global_step)
		# step_time, loss = 0.0, 0.0
		# # Run evals on development set and print their perplexity. (used for validation)
		# for bucket_id in xrange(len(_buckets)):
		#   if len(dev_set[bucket_id]) == 0:
		#     print("  eval: empty bucket %d" % (bucket_id))
		#     continue
		#   encoder_inputs, decoder_inputs, target_weights = model.get_batch(
		#       dev_set, bucket_id)
		#   _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
		#                                target_weights, bucket_id, True)
		#   eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')
		#   print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
		sys.stdout.flush()
Beispiel #11
0
def train():
  # prepare dataset
  print("Preparing data in %s" % gConfig['working_directory'])
  enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data(gConfig['working_directory'],gConfig['train_enc'],gConfig['train_dec'],gConfig['test_enc'],gConfig['test_dec'],gConfig['enc_vocab_size'],gConfig['dec_vocab_size'])

  # Only allocate 2/3 of the gpu memory to allow for running gpu-based predictions while training:
  gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666)
  config = tf.ConfigProto(gpu_options=gpu_options)
  config.gpu_options.allocator_type = 'BFC'

  with tf.Session(config=config) as sess:
    # Create model.
    print("Creating %d layers of %d units." % (gConfig['num_layers'], gConfig['layer_size']))
    model = create_model(sess, False)

    # Read data into buckets and compute their sizes.
    print ("Reading development and training data (limit: %d)."
           % gConfig['max_train_data_size'])
    dev_set = read_data(enc_dev, dec_dev)
    train_set = read_data(enc_train, dec_train, gConfig['max_train_data_size'])
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while True:
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / gConfig['steps_per_checkpoint']
      loss += step_loss / gConfig['steps_per_checkpoint']
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % gConfig['steps_per_checkpoint'] == 0:
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
        
        # if no improvement was seen over last 3 times , decrese the learning rate
        
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)

        checkpoint_path = os.path.join(gConfig['working_directory'], "seq2seq.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0

        for bucket_id in xrange(len(_buckets)):
          if len(dev_set[bucket_id]) == 0:
            print("  eval: empty bucket %d" % (bucket_id))
            continue
          encoder_inputs, decoder_inputs, target_weights = model.get_batch(
              dev_set, bucket_id)
          _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True)
          eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')
          print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
        sys.stdout.flush()
Beispiel #12
0
def train():
    # prepare encoding data (what's heard) with decoding data (response)
    enc_train, dec_train = data_utils.prepare_custom_data(
        gConfig['working_directory'])
    train_set = read_data(enc_train, dec_train)
Beispiel #13
0
            response = " ".join(
                [tf.compat.as_str(vocab_list[output]) for output in outputs])
            sentence = prompt_user("next", response)


def prompt_user(phase, response=None):
    if phase == "start":
        print("Your query: ")
    elif phase == "next":
        print("Trumps response: ")
        print(response)
    elif phase == "long":
        print("Your input was too long.  Please try a shorter sentence.")

    sys.stdout.write("> ")
    sys.stdout.flush()
    sentence = sys.stdin.readline()
    return sentence


if __name__ == '__main__':
    if config.mode == 'train':
        # print("Preparing data in %s" % config.working_directory)
        data_utils.prepare_custom_data()
        config_tf = tf.ConfigProto()  # setup config to use BFC allocator
        config_tf.gpu_options.allocator_type = 'BFC'
        train()
    elif config.mode == 'test':
        data_utils.load_en()
        test()
Beispiel #14
0
def train():
    #Prepares the dataset
    enc_train, dec_train = data_utils.prepare_custom_data(gConfig['working_directory'])

    train_set = read_data(enc_train, dec_train)
Beispiel #15
0
def train():

    # Prepare dataset
    print("Preparing data in %s" % gConfig['working_directory'])
    enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data(
        gConfig['working_directory'], gConfig['train_enc'],
        gConfig['train_dec'], gConfig['test_enc'], gConfig['test_dec'],
        gConfig['enc_vocab_size'], gConfig['dec_vocab_size'])

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666)
    config = tf.ConfigProto(gpu_options=gpu_options)
    config.gpu_options.allocator_type = 'BFC'

    with tf.Session(config=config) as sess:

        # Create model.
        print("Creating %d layers of %d units." %
              (gConfig['num_layers'], gConfig['layer_size']))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              gConfig['max_train_data_size'])
        dev_set = read_data(enc_dev, dec_dev)
        train_set = read_data(enc_train, dec_train,
                              gConfig['max_train_data_size'])
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # Train loop
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Count the step of the training process
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / gConfig['steps_per_checkpoint']
            loss += step_loss / gConfig['steps_per_checkpoint']
            current_step += 1

            # Save checkpoints and print statistics
            if current_step % gConfig['steps_per_checkpoint'] == 0:

                # Print statistics for the previous epoch.
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))

                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)

                # Save checkpoint of training process
                checkpoint_path = os.path.join(gConfig['working_directory'],
                                               "seq2seq.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0

                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        eval_loss) if eval_loss < 300 else float('inf')
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()
Beispiel #16
0
def train():
    # prepare dataset
    print("Preparing data in %s" % gConfig['working_directory'])
    enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data(
        gConfig['working_directory'], gConfig['train_enc'],
        gConfig['train_dec'], gConfig['test_enc'], gConfig['test_dec'],
        gConfig['enc_vocab_size'], gConfig['dec_vocab_size'])
    print("vocabulary created.")
    exit(0)
    # Only allocate 2/3 of the gpu memory to allow for running gpu-based predictions while training:
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666)
    config = tf.ConfigProto(gpu_options=gpu_options)
    config.gpu_options.allocator_type = 'BFC'

    with tf.Session(config=config) as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (gConfig['num_layers'], gConfig['layer_size']))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              gConfig['max_train_data_size'])
        dev_set = read_data(enc_dev, dec_dev)
        train_set = read_data(enc_train, dec_train,
                              gConfig['max_train_data_size'])
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / gConfig['steps_per_checkpoint']
            loss += step_loss / gConfig['steps_per_checkpoint']
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % gConfig['steps_per_checkpoint'] == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(gConfig['working_directory'],
                                               "seq2seq.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        eval_loss) if eval_loss < 300 else float('inf')
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()
Beispiel #17
0
def train():
    # prepare dataset
    if not (tf.gfile.Exists(gConfig['working_dir'] + 'im_filename.txt')
            and tf.gfile.Exists(gConfig['working_dir'] + 'caption_a.txt')
            and tf.gfile.Exists(gConfig['working_dir'] + 'caption_b.txt')):
        print("Creating dataset...")
        # generate im_filename, caption_a, caption_b and save them to working_dir
        data_utils.parse_MSCOCO(gConfig['mscoco_path'],
                                gConfig['working_dir'],
                                permute=True)

    # prepare encoder/decoder inputs
    print("Preparing data in %s" % gConfig['working_dir'])
    enc_train, dec_train, _, _ = data_utils.prepare_custom_data(
        gConfig['working_dir'], gConfig['working_dir'] + 'caption_a.txt',
        gConfig['working_dir'] + 'caption_b.txt', gConfig['enc_vocab_size'],
        gConfig['dec_vocab_size'])

    # setup config to use BFC allocator
    config = tf.ConfigProto()
    config.gpu_options.allocator_type = 'BFC'

    config.gpu_options.allow_growth = True  # "chunks"

    with tf.Session(config=config) as sess:
        # create model
        print("Creating %d layers of %d units." %
              (gConfig['num_layers'], gConfig['layer_size']))
        sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading training data (limit: %d)." %
              gConfig['max_train_data_size'])
        train_set = read_vector(sess,
                                gConfig['working_dir'] + 'im_filename.txt',
                                enc_train, dec_train,
                                gConfig['max_train_data_size'])
        # train_set = read_vector(sess, gConfig['working_dir'] + 'toy_im_filename.txt',
        #                       gConfig['working_dir'] + 'toy_caption_a.txt.ids50000',
        #                       gConfig['working_dir'] + 'toy_caption_b.txt.ids50000',
        #                       gConfig['max_train_data_size'])
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # op to write logs to Tensorboard
        summary_writer = tf.summary.FileWriter(gConfig['log_dir'],
                                               graph=sess.graph)

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        # pdb.set_trace()
        while model.global_step.eval() <= gConfig['max_num_steps']:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            step_loss_summary = tf.Summary()
            learning_rate_summary = tf.Summary()

            # Get a batch and make a step.
            start_time = time.time()
            decoder_hiddens, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, decoder_hiddens, decoder_inputs,
                                         target_weights, bucket_id, False)

            step_loss_value = step_loss_summary.value.add()
            step_loss_value.tag = "step loss"
            step_loss_value.simple_value = step_loss.astype(float)
            learning_rate_value = learning_rate_summary.value.add()
            learning_rate_value.tag = "learning rate"
            learning_rate_value.simple_value = model.learning_rate.eval(
            ).astype(float)
            # Write logs at every iteration
            summary_writer.add_summary(step_loss_summary,
                                       model.global_step.eval())
            summary_writer.add_summary(learning_rate_summary,
                                       model.global_step.eval())

            step_time += (time.time() -
                          start_time) / gConfig['steps_per_checkpoint']
            loss += step_loss / gConfig['steps_per_checkpoint']
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % gConfig[
                    'steps_per_checkpoint'] == 0 or current_step == 1:
                # Print statistics for the previous epoch.
                if current_step == 1:
                    perplexity = math.exp(
                        step_loss) if step_loss < 300 else float('inf')
                    print(
                        "global step %d learning rate %.4f loss %.4f perplexity %.2f"
                        % (model.global_step.eval(),
                           model.learning_rate.eval(), step_loss, perplexity))
                else:
                    perplexity = math.exp(loss) if loss < 300 else float('inf')
                    print(
                        "global step %d learning rate %.4f step-time %.2f loss %.4f perplexity "
                        "%.2f" %
                        (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, loss, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(gConfig['model_dir'],
                                               "seq2seq.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0

                sys.stdout.flush()