Beispiel #1
0
    def load_data(self):
        # TODO: make configurable
        self.data_dir = "/data/WMT15/"

        print("Preparing WMT data in %s" % self.data_dir)
        en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data(
            self.data_dir, self.en_vocab_size, self.fr_vocab_size)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              self.max_train_data_size)
        self.dev_set = self.read_data(en_dev, fr_dev)
        self.train_set = self.read_data(en_train, fr_train,
                                        self.max_train_data_size)
        train_bucket_sizes = [
            len(self.train_set[b]) for b in xrange(len(self._buckets))
        ]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        self.train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]
Beispiel #2
0
def train():
  #创建词典,最后返回训练数据id映射文件
  en_train, ch_train, _, _ = data_utils.prepare_wmt_data(400,400)

  with tf.Session() as sess:
    model = create_model(sess, False)

    dev_set = read_data(en_train, ch_train)#测试使用的数据
    train_set = read_data(en_train, ch_train)#返回的数据句子,还没经过pad补齐
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]#保存了每个bucket中,句子的个数
    #print (train_set[2])
    train_total_size = float(sum(train_bucket_sizes))#训练数据总共有多少个句子


    #这个是为了合理分配每个bucket中,训练的时候batchsize的大小选择问问题,选择概率用的
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # 开始循环训练
    print ('…………………………………………开始训练×××××××××')
    while True:
      #每次训练,我们都从所有的bucket中,随机选一个bucket(根据bucket句子个数,句子多的,选中的概率大)
      #然后从选中的bucket中,我们又随机的选出batch个句子,进行训练
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])
      print (bucket_id)
      #获取batch训练数据
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      print (encoder_inputs)
      print (decoder_inputs)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,target_weights, bucket_id, False)




      #验证阶段,每训练n次,我们就验证一次,打印结果
      '''if current_step % FLAGS.steps_per_checkpoint == 0:
Beispiel #3
0
def prepare_data():
    from_train = None
    to_train = None
    from_dev = None
    to_dev = None
    if FLAGS.from_train_data and FLAGS.to_train_data:
        from_train_data = FLAGS.from_train_data
        to_train_data = FLAGS.to_train_data
        from_dev_data = from_train_data
        to_dev_data = to_train_data
        if FLAGS.from_dev_data and FLAGS.to_dev_data:
            from_dev_data = FLAGS.from_dev_data
            to_dev_data = FLAGS.to_dev_data
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data(
            FLAGS.data_dir, from_train_data, to_train_data, from_dev_data,
            to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size,
            data_utils.char_tokenizer)
    else:
        # Prepare WMT data.
        print("Preparing WMT data in %s" % FLAGS.data_dir)
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data(
            FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size)
    return from_train, to_train, from_dev, to_dev
Beispiel #4
0
def train():
  """Train a en->fr translation model using WMT data."""
  # Prepare WMT data.
  print("Preparing WMT data in %s" % FLAGS.data_dir)
  en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data(
      FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size)

  with tf.Session() as sess:
    # Create model.
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    model = create_model(sess, False)

    # Read data into buckets and compute their sizes.
    print ("Reading development and training data (limit: %d)."
           % FLAGS.max_train_data_size)
    dev_set = read_data(en_dev, fr_dev)
    train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while True:
      # Choose a bucket according to data distribution. We pick a random number
      # in [0, 1] and use the corresponding interval in train_buckets_scale.
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      # Get a batch and make a step.
      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
      loss += step_loss / FLAGS.steps_per_checkpoint
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % FLAGS.steps_per_checkpoint == 0:
        # Print statistics for the previous epoch.
        perplexity = math.exp(float(loss)) if loss < 300 else float("inf")
        print ("global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
        # Decrease learning rate if no improvement was seen over last 3 times.
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        # Run evals on development set and print their perplexity.
        for bucket_id in xrange(len(_buckets)):
          if len(dev_set[bucket_id]) == 0:
            print("  eval: empty bucket %d" % (bucket_id))
            continue
          encoder_inputs, decoder_inputs, target_weights = model.get_batch(
              dev_set, bucket_id)
          _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True)
          eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float(
              "inf")
          print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
        sys.stdout.flush()
Beispiel #5
0
def train():
  """Train a en->fr translation model using WMT data."""
  from_train = None
  to_train = None
  from_dev = None
  to_dev = None
  if FLAGS.from_train_data and FLAGS.to_train_data:
    from_train_data = FLAGS.from_train_data
    to_train_data = FLAGS.to_train_data
    from_dev_data = from_train_data
    to_dev_data = to_train_data
    if FLAGS.from_dev_data and FLAGS.to_dev_data:
      from_dev_data = FLAGS.from_dev_data
      to_dev_data = FLAGS.to_dev_data
    from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data(
        FLAGS.data_dir,
        from_train_data,
        to_train_data,
        from_dev_data,
        to_dev_data,
        FLAGS.from_vocab_size,
        FLAGS.to_vocab_size)
  else:
      # Prepare WMT data.
      print("Preparing WMT data in %s" % FLAGS.data_dir)
      from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data(
          FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size)
  
  with tf.Session() as sess:
    # Create model.
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    model = create_model(sess,True)

    # Read data into buckets and compute their sizes.
    print ("Reading development and training data (limit: %d)."
           % FLAGS.max_train_data_size)
    dev_set = read_data(from_dev, to_dev)
    train_set = read_data(from_train, to_train, FLAGS.max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    Output_first = open('first_layer_states.txt','wb',1000)
    Output_second = open('second_layer_states.txt','wb',1000)
 
    en_vocab_path = os.path.join(FLAGS.data_dir,
                                 "vocab%d.from" % FLAGS.from_vocab_size)
    fr_vocab_path = os.path.join(FLAGS.data_dir,
                                 "vocab%d.to" % FLAGS.to_vocab_size)
    en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
    _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)
   
    model.batch_size=1
    with gfile.GFile(FLAGS.test_data, mode="rb") as f:
     for sentence in f:
    #sentence = sys.stdin.readline()
    #while sentence:
      print(sentence)
      # Get token-ids for the input sentence.
      token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab)
      # Which bucket does it belong to?
      bucket_id = len(_buckets) - 1
      for i, bucket in enumerate(_buckets):
        if bucket[0] >= len(token_ids):
          bucket_id = i
          break
      else:
        logging.warning("Sentence truncated: %s", sentence)

      # Get a 1-element batch to feed the sentence to the model.
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          {bucket_id: [(token_ids, [])]}, bucket_id)
      # Get output logits for the sentence.
      _, _, output_logits,enc_last_state,_ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True,0)
      
      
      first_layer = np.array(enc_last_state[0])
      mat_first_layer = np.matrix(first_layer)
      for line in mat_first_layer:
         np.savetxt(Output_first, line, fmt='%.2f')

      second_layer = np.array(enc_last_state[1])
      mat_second_layer = np.matrix(second_layer)
      for line in mat_second_layer:
         np.savetxt(Output_second, line, fmt='%.2f')
Beispiel #6
0
def train():
    """Train a en->fr translation model using WMT data."""
    from_train = None
    to_train = None
    from_dev = None
    to_dev = None
    if FLAGS.from_train_data and FLAGS.to_train_data:
        from_train_data = FLAGS.from_train_data
        to_train_data = FLAGS.to_train_data
        from_dev_data = from_train_data
        to_dev_data = to_train_data
        if FLAGS.from_dev_data and FLAGS.to_dev_data:
            from_dev_data = FLAGS.from_dev_data
            to_dev_data = FLAGS.to_dev_data
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data(
            FLAGS.data_dir, from_train_data, to_train_data, from_dev_data,
            to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size)
    else:
        # Prepare WMT data.
        print("Preparing WMT data in %s" % FLAGS.data_dir)
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data(
            FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size)

    # Merge all summaries and write them out
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train')

    with tf.Session() as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)
        train_writer.add_graph(sess.graph)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(from_dev, to_dev)
        train_set = read_data(from_train, to_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "translate.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        float(eval_loss)) if eval_loss < 300 else float("inf")
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()

    train_writer.close()
def train():
    """Train a en->fr translation model using WMT data."""
    # Prepare WMT data.
    print("Preparing data in %s" % FLAGS.data_dir)
    en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data(
        FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size)

    print("train_fm: " + en_train)
    print("train_to: " + fr_train)
    print("fm_dev  : " + en_dev)
    print("to_dev  : " + fr_dev)

    # exit()

    with tf.Session() as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(en_dev, fr_dev)
        train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        ckpt_cnt = 4
        previous_losses = []
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            if ((FLAGS.steps_per_checkpoint < 100) or
                (current_step % int(FLAGS.steps_per_checkpoint / 100)) == 0):
                print('.', end='')
                sys.stdout.flush()

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                if current_step % (FLAGS.steps_per_checkpoint * 10) == 0:
                    ckpt_cnt += 1
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(
                    FLAGS.train_dir, "translate.%04d.ckpt" % ckpt_cnt)
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                accum, cnts = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]):
                        encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                            dev_set, bucket_id)
                        _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                     decoder_inputs,
                                                     target_weights, bucket_id,
                                                     True)
                        eval_ppx = math.exp(
                            eval_loss) if eval_loss < 300 else float('inf')
                        cnts += len(dev_set[bucket_id])
                        accum += len(dev_set[bucket_id]) * eval_ppx
                mean_eval_ppx = accum / cnts
                print(
                    "\nGlobal step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f %.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity, mean_eval_ppx))
                sys.stdout.flush()
                step_time, loss = 0.0, 0.0
Beispiel #8
0
def train():

    print("Preparing data in %s" % FLAGS.data_dir)
    in_train, out_train, in_dev, out_dev, _, _ = data_utils.prepare_wmt_data(
        FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)

    with tf.Session() as sess:

        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(in_dev, out_dev)
        train_set = read_data(in_train, out_train, FLAGS.max_train_data_size)

        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:

            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)

            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            if current_step % FLAGS.steps_per_checkpoint == 0:

                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))

                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)

                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "translate.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0

                for bucket_id in xrange(len(_buckets)):
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        eval_loss) if eval_loss < 300 else float('inf')
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()
Beispiel #9
0
def train_early_stop():
  """Train a en->fr translation model using AMR data with early stopping."""
  # Prepare data.
  print("Preparing WMT data in %s" % FLAGS.data_dir)
  en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data(
      FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size, FLAGS.amrseq_version)

  with tf.Session() as sess:
    # Create model.
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    model = create_model(sess, False)

    # Read data into buckets and compute their sizes.
    print ("Reading development and training data (limit: %d)."
           % FLAGS.max_train_data_size)
    dev_set = read_data(en_dev, fr_dev)
    train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []

    done_looping = False
    improvement_threshold = 0.995

    best_eval_total_ppx = np.inf
    best_step = 0
    patience = int(train_total_size / FLAGS.batch_size) # go over this number of steps(batches) anyway
    patience_increase = 2
    
    while model.global_step.eval() < FLAGS.max_steps and (not done_looping):

      # Choose a bucket according to data distribution. We pick a random number
      # in [0, 1] and use the corresponding interval in train_buckets_scale.
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      # Get a batch and make a step.
      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
      loss += step_loss / FLAGS.steps_per_checkpoint
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % FLAGS.steps_per_checkpoint == 0:
        # Print statistics for the previous epoch.
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("Current step %d, global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f" % (current_step, model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
        # Decrease learning rate if no improvement was seen over last 3 times.
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)

        
        # Save checkpoint and zero timer and loss.
        #checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt")
        #model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        
        # Run evals on development set and print their perplexity.
        eval_total_ppx = 0.0 # total perplexity in all validation buckets
        tmp_batch_size = model.batch_size
        for bucket_id in xrange(len(_buckets)):
          model.batch_size = len(dev_set[bucket_id]) # eval the whole bucket 
          encoder_inputs, decoder_inputs, target_weights = model.get_batch(
              dev_set, bucket_id)
          _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True)
          eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')
          eval_total_ppx += eval_ppx
          print("  eval: bucket %d size:%d perplexity %.2f" % (bucket_id, model.batch_size, eval_ppx))
        model.batch_size = tmp_batch_size
        if eval_total_ppx < best_eval_total_ppx:
          if (eval_total_ppx < best_eval_total_ppx * improvement_threshold): # the improvement is good enough
            patience = max(patience, model.global_step.eval() * patience_increase)
          best_eval_total_ppx = eval_total_ppx
          best_step = model.global_step.eval()

          # save the current checkpoint
          checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt")
          model.saver.save(sess, checkpoint_path, global_step=model.global_step)

        if patience <= model.global_step.eval():
          done_looping = True
        sys.stdout.flush()

    print("Optimization complete. Best total validation perplexity %f obtained at global step %d." % (best_eval_total_ppx, best_step))
Beispiel #10
0
def train():
  """Train a en->fr translation model using WMT data."""
  # Prepare WMT data.
  print("Preparing WMT data in %s" % FLAGS.data_dir)
  en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data(
      FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size)
  fr_vocab_path = os.path.join(FLAGS.data_dir,
                                 "vocab%d.en" % FLAGS.fr_vocab_size)
  #en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
  _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)

  gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1)
  with tf.Session(config=tf.ConfigProto(device_count={'GPU':1}, gpu_options = gpu_options)) as sess:
    # Create model.
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    model = create_model(sess, False)

    # Read data into buckets and compute their sizes.
    print ("Reading development and training data (limit: %d)."
           % FLAGS.max_train_data_size)
    dev_set = read_data(en_dev, fr_dev)
    train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size)
    #embed()
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while True:
      # Choose a bucket according to data distribution. We pick a random number
      # in [0, 1] and use the corresponding interval in train_buckets_scale.
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      # Get a batch and make a step.
      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
      loss += step_loss / FLAGS.steps_per_checkpoint
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % FLAGS.steps_per_checkpoint == 0:
        # Print statistics for the previous epoch.
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
        print ("step loss:%.4f", step_loss)
        # Decrease learning rate if no improvement was seen over last 3 times.
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt"+str(loss))
        model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=0)
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
Beispiel #11
0
def train():
    """Train a src->trg translation model."""
    print("Preparing training and dev data in %s" % FLAGS.data_dir)
    src_train, trg_train, src_dev, trg_dev, src_vocab_path, trg_vocab_path = data_utils.prepare_wmt_data(
            FLAGS.data_dir, FLAGS.src_vocab_size, FLAGS.trg_vocab_size)

    src_vocab, rev_src_vocab = data_utils.initialize_vocabulary(src_vocab_path)
    trg_vocab, rev_trg_vocab = data_utils.initialize_vocabulary(trg_vocab_path)

    if FLAGS.src_vocab_size > len(src_vocab):
        FLAGS.src_vocab_size = len(src_vocab)
    if FLAGS.trg_vocab_size > len(trg_vocab):
        FLAGS.trg_vocab_size = len(trg_vocab)

    with tf.Session() as sess:
        # Create model.
        print("Creating %d layers of %d units with word embedding %d."
              % (FLAGS.num_layers, FLAGS.hidden_units, FLAGS.hidden_edim))
        model = create_model(sess, False)
        dev_set = read_data(src_dev, trg_dev)
        train_set = read_data(src_train, trg_train)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                               for i in xrange(len(train_bucket_sizes))]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([i for i in xrange(len(train_buckets_scale))
                             if train_buckets_scale[i] > random_number_01])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, encoder_mask, decoder_inputs, target_weights = model.get_batch(
                    train_set, bucket_id)

            _, step_loss, _ = model.step(sess, encoder_inputs, encoder_mask, decoder_inputs,
                                         target_weights, bucket_id, False)

            step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print("global step %d learning rate %.8f step-time %.2f perplexity "
                      "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                                step_time, perplexity))

                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt")
                model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, encoder_mask, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs, encoder_mask, decoder_inputs,
                                                 target_weights, bucket_id, True)
                    eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')
                    print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
                sys.stdout.flush()
Beispiel #12
0
def train():
    """Train a en->fr translation model using WMT data."""
    # Prepare WMT data.
    print("Preparing WMT data in %s" % FLAGS.data_dir)
    en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data(
        FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size)

    # Only allocate 2/3 of the gpu memory to allow for running gpu-based predictions while training:
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666)
    config = tf.ConfigProto(gpu_options=gpu_options)
    config.gpu_options.allocator_type = 'BFC'

    with tf.Session(config=config) as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(en_dev, fr_dev)
        train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size)
        #    for bucket_id, (source_size, target_size) in enumerate(_buckets):
        #      print("data set index %d count: %d" % (bucket_id, len(train_set[bucket_id])))
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        best_eval_ppx = float('inf')
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "translate.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                eval_ppx_list = []
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        eval_loss) if eval_loss < 300 else float('inf')
                    eval_ppx_list.append(eval_ppx)
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()

                mean_eval_ppx = np.mean(eval_ppx_list)
                if mean_eval_ppx < best_eval_ppx:
                    best_eval_ppx = mean_eval_ppx
Beispiel #13
0
def train():
    """Train a en->fr translation model using WMT data."""
    # Prepare WMT data.
    logging.debug("Preparing WMT data in %s" % FLAGS.data_dir)
    en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data(
        FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size)

    # Beam search is false during training operation and usedat inference .
    beam_search = False
    beam_size = 5
    attention = FLAGS.attention
    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        # Create model.
        logging.debug("Creating %d layers of %d units." %
                      (FLAGS.num_layers, FLAGS.size))
        #model = create_model(sess, False)
        model = create_model(sess,
                             False,
                             beam_search=beam_search,
                             beam_size=beam_size,
                             attention=attention)

        # Read data into buckets and compute their sizes.
        logging.debug("Reading development and training data (limit: %d)." %
                      FLAGS.max_train_data_size)
        dev_set = read_data(en_dev, fr_dev)
        logging.debug("Finish reading data")
        train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        logging.debug('Started training')
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            #_, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
            #                             target_weights, bucket_id, False)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False,
                                         beam_search)

            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1
            # Printing perplexity every 10 iterations for plotting
            #if current_step % 10 == 0:
            #perplexity10 = math.exp(float(loss)) if loss < 300 else float("inf")
            #logging.debug("Plot: global step %d learning rate %.4f step-time %.2f perplexity "
            #      "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
            #               step_time, perplexity10))

            # Once in a while, we save checkpoint, logging.debugstatistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                logging.debug(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir, "ama.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and logging.debugtheir perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        logging.debug("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        float(eval_loss)) if eval_loss < 300 else float("inf")
                    logging.debug("  eval: bucket %d perplexity %.2f" %
                                  (bucket_id, eval_ppx))
                sys.stdout.flush()
Beispiel #14
0
def train():
    """Train a en->fr translation model using WMT data."""
    from_train = None
    to_train = None
    from_dev = None
    to_dev = None
    if FLAGS.from_train_data and FLAGS.to_train_data:
        from_train_data = FLAGS.from_train_data
        to_train_data = FLAGS.to_train_data
        from_dev_data = from_train_data
        to_dev_data = to_train_data
        if FLAGS.from_dev_data and FLAGS.to_dev_data:
            from_dev_data = FLAGS.from_dev_data
            to_dev_data = FLAGS.to_dev_data
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data(
            FLAGS.data_dir, from_train_data, to_train_data, from_dev_data,
            to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size)
    else:
        # Prepare WMT data.
        print("Preparing data in %s" % FLAGS.data_dir)
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data(
            FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size)

    with tf.Session() as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(from_dev, to_dev)
        train_set = read_data(from_train, to_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        eval_ppx_history = []
        #while current_step < FLAGS.max_num_steps:
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _, _ = model.step(
                sess,
                encoder_inputs,
                decoder_inputs,  ###, _
                target_weights,
                bucket_id,
                False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.4f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "translate.ckpt")
                if not os.path.isabs(checkpoint_path):
                    checkpoint_path = os.path.abspath(
                        os.path.join(os.getcwd(), checkpoint_path))
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                eval_ppx = np.zeros(len(_buckets), dtype=np.float32)
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    # 0717 newly modified
                    num_buckets = int(
                        math.ceil(1.0 * len(dev_set[bucket_id]) /
                                  FLAGS.batch_size))
                    eval_loss = np.zeros(num_buckets, dtype=np.float32)
                    for idx in range(num_buckets):
                        encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                            dev_set, bucket_id)
                        _, eval_loss[idx], _, eval_lasthidden = model.step(
                            sess,
                            encoder_inputs,
                            decoder_inputs,  ###
                            target_weights,
                            bucket_id,
                            True)
                    eval_ppx = math.exp(np.mean(
                        eval_loss)) if eval_loss.mean() < 300 else float("inf")
                    print("  eval: bucket %d perplexity %.4f" %
                          (bucket_id, eval_ppx))
                # 0717 newly added: Stop criteria, minimum point passing 400 epoch
                population = np.array([
                    len(dev_set[bucket_id])
                    for bucket_id in xrange(len(_buckets))
                ])
                total_eval_ppx = np.sum(eval_ppx * population)
                print("  totsl eval perplexity %.4f" % total_eval_ppx)
                if len(eval_ppx_history) == 0:
                    eval_ppx_history.append(total_eval_ppx)
                    sys.stdout.flush()
                    continue
                if total_eval_ppx > eval_ppx_history[0]:
                    eval_ppx_history.append(total_eval_ppx)
                    if total_eval_ppx > eval_ppx_history[-1]:
                        sess.run(model.learning_rate_decay_op)
                    if len(eval_ppx_history) == 5:
                        sys.stdout.flush()
                        break
                else:
                    eval_ppx_history = [total_eval_ppx]
                sys.stdout.flush()
def train():
    """Train a en->fr translation model using WMT data."""
    from_train = None
    to_train = None
    from_dev = None
    to_dev = None
    if FLAGS.from_train_data and FLAGS.to_train_data:
        from_train_data = FLAGS.from_train_data
        to_train_data = FLAGS.to_train_data
        from_dev_data = from_train_data
        to_dev_data = to_train_data
        if FLAGS.from_dev_data and FLAGS.to_dev_data:
            from_dev_data = FLAGS.from_dev_data
            to_dev_data = FLAGS.to_dev_data
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data(
            FLAGS.data_dir, from_train_data, to_train_data, from_dev_data,
            to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size)
    else:
        # Prepare WMT data.
        handleInfo(str("Preparing WMT data in : " + str(FLAGS.data_dir)))
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data(
            FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size)

    with tf.Session() as sess:
        # Create model.
        handleInfo(str("Creating " + str(FLAGS.num_layers) + " layers of " + str(FLAGS.size) + " units."))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        handleInfo(
            str("Reading development and training data (limit: " +
                str(FLAGS.max_train_data_size) + ")."))
        dev_set = read_data(from_dev, to_dev)
        train_set = read_data(from_train, to_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False)
            step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(float(loss)) if loss < 300 else float("inf")

                perplexityRound = round(perplexity, 1)

                if perplexityRound < 9.9:
                    if not _lowestPerplexity:
                        _lowestPerplexity.append(perplexityRound)
                        _lowestPerplexity.append(0)

                    if perplexityRound == _lowestPerplexity[0]:
                        if _lowestPerplexity[1] > 9:
                            break
                        else:
                            count = _lowestPerplexity[1]
                            count = count + 1
                            _lowestPerplexity[1] = count
                    else:
                        if perplexityRound < _lowestPerplexity[0]:
                            _lowestPerplexity[0] = perplexityRound
                            _lowestPerplexity[1] = 1

                    handleInfo("Lowest Perplexity List--" + str(_lowestPerplexity))

                message = "global step " + str(
                    model.global_step.eval()) + " learning rate " + str(
                        model.learning_rate.eval()) + " step-time " + str(
                            step_time) + " perplexity " + str(perplexity)
                handleInfo(message)
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,"translate.ckpt")
                model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        handleInfo(str("Eval: empty bucket : " + str(bucket_id)))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True)
                    eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float("inf")
                    handleInfo(str("Eval: bucket " + str(bucket_id) + " perplexity : " + str(eval_ppx)))
                sys.stdout.flush()
Beispiel #16
0
def train():
    """Train a en->fr translation model using WMT data."""
    from_train = None
    to_train = None
    from_dev = None
    to_dev = None
    if FLAGS.from_train_data and FLAGS.to_train_data:
        from_train_data = FLAGS.from_train_data
        to_train_data = FLAGS.to_train_data
        from_dev_data = from_train_data
        to_dev_data = to_train_data
        if FLAGS.from_dev_data and FLAGS.to_dev_data:
            from_dev_data = FLAGS.from_dev_data
            to_dev_data = FLAGS.to_dev_data
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data(
            FLAGS.data_dir, from_train_data, to_train_data, from_dev_data,
            to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size)

        en_vocab_path = os.path.join(FLAGS.data_dir,
                                     "vocab%d.from" % FLAGS.from_vocab_size)
        _, rev_fr_vocab = data_utils.initialize_vocabulary(en_vocab_path)
    else:
        # Prepare WMT data.
        print("Preparing WMT data in %s" % FLAGS.data_dir)
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data(
            FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size)

    train_graph = tf.Graph()
    eval_graph = tf.Graph()
    train_sess = tf.Session(graph=train_graph)
    eval_sess = tf.Session(graph=eval_graph)
    #eval_sess = tf_debug.LocalCLIDebugWrapperSession(eval_sess)

    with train_graph.as_default():
        # Create train model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        train_model = create_model(train_sess, False)

    with eval_graph.as_default():
        # Create eval model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        eval_model = create_model(eval_sess, True)

    #with tf.Session() as sess:

    # Read data into buckets and compute their sizes.
    print("Reading development and training data (limit: %d)." %
          FLAGS.max_train_data_size)
    dev_set = read_data(from_dev, to_dev)
    train_set = read_data(from_train, to_train, FLAGS.max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [
        sum(train_bucket_sizes[:i + 1]) / train_total_size
        for i in xrange(len(train_bucket_sizes))
    ]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while True:
        # Choose a bucket according to data distribution. We pick a random number
        # in [0, 1] and use the corresponding interval in train_buckets_scale.
        random_number_01 = np.random.random_sample()
        bucket_id = min([
            i for i in xrange(len(train_buckets_scale))
            if train_buckets_scale[i] > random_number_01
        ])

        # Get a batch and make a step.
        start_time = time.time()
        encoder_inputs, decoder_inputs, target_weights, target_inputs, sent_ids = train_model.get_batch(
            train_set, bucket_id)
        _, step_loss, _ = train_model.step(train_sess, encoder_inputs,
                                           decoder_inputs, target_weights,
                                           target_inputs, bucket_id, False)
        step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
        loss += step_loss / FLAGS.steps_per_checkpoint
        current_step += 1

        # Once in a while, we save checkpoint, print statistics, and run evals.
        if current_step % FLAGS.steps_per_checkpoint == 0:
            # Print statistics for the previous epoch.
            perplexity = math.exp(float(loss)) if loss < 300 else float("inf")
            print(
                "global step %d learning rate %.4f step-time %.2f perplexity "
                "%.2f" % (train_model.global_step.eval(session=train_sess),
                          train_model.learning_rate.eval(session=train_sess),
                          step_time, perplexity))
            # Decrease learning rate if no improvement was seen over last 3 times.
            if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                train_sess.run(train_model.learning_rate_decay_op)
            previous_losses.append(loss)
            # Save checkpoint and zero timer and loss.
            checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt")
            ckpt_path = train_model.saver.save(
                train_sess,
                checkpoint_path,
                global_step=train_model.global_step)
            eval_model.saver.restore(eval_sess, ckpt_path)
            step_time, loss = 0.0, 0.0
            # Run evals on development set and print their perplexity.
            print("run evals")
            ft = open('tmp.eval.ids', 'w')
            for bucket_id in xrange(len(_buckets)):
                if len(dev_set[bucket_id]) == 0:
                    print("  eval: empty bucket %d" % (bucket_id))
                    continue
                all_encoder_inputs, all_decoder_inputs, all_target_weights, all_target_inputs, all_sent_ids = eval_model.get_all_batch(
                    dev_set, bucket_id)
                #ipdb.set_trace()
                for idx in xrange(len(all_encoder_inputs)):
                    _, eval_loss, output_logits = eval_model.step(
                        eval_sess, all_encoder_inputs[idx],
                        all_decoder_inputs[idx], all_target_weights[idx],
                        all_target_inputs[idx], bucket_id, True)
                    batch_ids = all_sent_ids[idx]
                    #eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float(
                    #    "inf")
                    #print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
                    #ipdb.set_trace()
                    swap_inputs = np.array(all_encoder_inputs[idx])
                    swap_inputs = swap_inputs.swapaxes(0, 1)

                    outputs = [
                        np.argmax(logit, axis=1) for logit in output_logits
                    ]
                    swap_outputs = np.array(outputs)
                    swap_outputs = swap_outputs.swapaxes(0, 1)

                    out_ids = []
                    for batch_id in xrange(len(swap_outputs)):
                        out_ids.append(
                            swap_inputs[batch_id][swap_outputs[batch_id]])
                    #if data_utils.EOS_ID in outputs:
                    #  t = [m[:m.index(data_utils.EOS_ID)] for m in t]

                    for batch_id in xrange(len(swap_outputs)):
                        #print(" ".join([tf.compat.as_str(rev_fr_vocab[o]) for o in m]))
                        ft.write(" ".join([
                            tf.compat.as_str(rev_fr_vocab[o])
                            for o in out_ids[batch_id].tolist()
                        ]) + "|" + str(batch_ids[batch_id]) + '\n')
            ft.close()
            print("converting output...")
            subprocess.call(
                "python convert_to_json.py --din tmp.eval.ids --dout out.json --dsource /users1/ybsun/seq2sql/WikiSQL/annotated/dev.jsonl",
                shell=True)
            print("running evaluation script...")
            subprocess.call(
                "python evaluate.py ../WikiSQL/data/dev.jsonl ../WikiSQL/data/dev.db  ./out.json",
                shell=True)

            print("finish evals")
            sys.stdout.flush()
Beispiel #17
0
def train():
                                                                                    
  print("Preparing data in %s" % FLAGS.data_dir)                                
  in_train, out_train, in_dev, out_dev, _, _ = data_utils.prepare_wmt_data(           
      FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size)                     
                                                  

  with tf.Session() as sess:
    

    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))       
    model = create_model(sess, False)                                               

    print ("Reading development and training data (limit: %d)."     
           % FLAGS.max_train_data_size)                                             
    dev_set = read_data(in_dev, out_dev)                                             
    train_set = read_data(in_train, out_train, FLAGS.max_train_data_size)            
                                                                                    
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]         
    train_total_size = float(sum(train_bucket_sizes))                               

 
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size     
                           for i in xrange(len(train_bucket_sizes))]              

    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while True:
     
      random_number_01 = np.random.random_sample()                     
      bucket_id = min([i for i in xrange(len(train_buckets_scale))      
                       if train_buckets_scale[i] > random_number_01])

      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(   
          train_set, bucket_id)                                           
                                                                          
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, 
                                   target_weights, bucket_id, False)      
      step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
      loss += step_loss / FLAGS.steps_per_checkpoint
      current_step += 1

      
      if current_step % FLAGS.steps_per_checkpoint == 0:
     
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))


        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)

        checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0

        for bucket_id in xrange(len(_buckets)):
          encoder_inputs, decoder_inputs, target_weights = model.get_batch(
              dev_set, bucket_id)
          _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True)
          eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')
          print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
        sys.stdout.flush()
Beispiel #18
0
def train():
  """Train a en->fr translation model using WMT data."""
  from_train = None
  to_train = None
  from_dev = None
  to_dev = None
  if FLAGS.from_train_data and FLAGS.to_train_data:
    from_train_data = FLAGS.from_train_data
    to_train_data = FLAGS.to_train_data
    from_dev_data = from_train_data
    to_dev_data = to_train_data
    if FLAGS.from_dev_data and FLAGS.to_dev_data:
      from_dev_data = FLAGS.from_dev_data
      to_dev_data = FLAGS.to_dev_data
    from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data(
        FLAGS.data_dir,
        from_train_data,
        to_train_data,
        from_dev_data,
        to_dev_data,
        FLAGS.from_vocab_size,
        FLAGS.to_vocab_size)
  else:
      # Prepare WMT data.
      print("Preparing WMT data in %s" % FLAGS.data_dir)
      from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data(
          FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size)
  
  with tf.Session() as sess:
    # Create model.
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    model = create_model(sess,True)

    # Read data into buckets and compute their sizes.
    print ("Reading development and training data (limit: %d)."
           % FLAGS.max_train_data_size)
    dev_set = read_data(from_dev, to_dev)
    train_set = read_data(from_train, to_train, FLAGS.max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while current_step<FLAGS.num_train_step:
      # Choose a bucket according to data distribution. We pick a random number
      # in [0, 1] and use the corresponding interval in train_buckets_scale.
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      # Get a batch and make a step.
      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _, enc_init_states,enc_all_outputs= model.step(sess, encoder_inputs, decoder_inputs,#MK change
                                   target_weights, bucket_id, True,1)
      step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
      loss += step_loss / FLAGS.steps_per_checkpoint
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % FLAGS.steps_per_checkpoint == 0:
        # Print statistics for the previous epoch.
        first_layer = np.array(enc_init_states[0])
        mat_first_layer = np.matrix(first_layer)
        with open('first_layer_states.txt','wb') as f:
          for line in mat_first_layer:
            np.savetxt(f, line, fmt='%.2f')

        second_layer = np.array(enc_init_states[1])
        mat_second_layer = np.matrix(second_layer)
        with open('second_layer_states.txt','wb') as f:
          for line in mat_second_layer:
            np.savetxt(f, line, fmt='%.2f')


        perplexity = math.exp(float(loss)) if loss < 300 else float("inf")
        print ("global step %d learning rate %.4f step-time %.5f perplexity "
               "%.5f" % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
        # Decrease learning rate if no improvement was seen over last 3 times.
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        # Run evals on development set and print their perplexity.
        for bucket_id in xrange(len(_buckets)):
          if len(dev_set[bucket_id]) == 0:
            print("  eval: empty bucket %d" % (bucket_id))
            continue
          encoder_inputs, decoder_inputs, target_weights = model.get_batch(
              dev_set, bucket_id)
          _, eval_loss, _,_,_ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True,0)
          eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float(
              "inf")
          print("  eval: bucket %d perplexity %.5f" % (bucket_id, eval_ppx))
        sys.stdout.flush()


    
    en_vocab_path = os.path.join(FLAGS.data_dir,
                                 "vocab%d.from" % FLAGS.from_vocab_size)
    fr_vocab_path = os.path.join(FLAGS.data_dir,
                                 "vocab%d.to" % FLAGS.to_vocab_size)
    en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
    _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)
   
    max_iter=100 
    count=0
    model.batch_size=1
    with gfile.GFile(FLAGS.from_train_data, mode="rb") as f:
     for sentence in f:
      count=count+1
      if max_iter < count:
        break
    #sentence = sys.stdin.readline()
    #while sentence:
      print(sentence)
      # Get token-ids for the input sentence.
      token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab)
      # Which bucket does it belong to?
      bucket_id = len(_buckets) - 1
      for i, bucket in enumerate(_buckets):
        if bucket[0] >= len(token_ids):
          bucket_id = i
          break
      else:
        logging.warning("Sentence truncated: %s", sentence)

      # Get a 1-element batch to feed the sentence to the model.
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          {bucket_id: [(token_ids, [])]}, bucket_id)
      # Get output logits for the sentence.
      _, _, output_logits,enc_all_state,_ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True,0)
      quit()
Beispiel #19
0
def train():
    """Train a en->fr translation model using WMT data."""
    # Prepare WMT data.
    print("Preparing WMT data in %s" % FLAGS.data_dir)
    en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data(
        FLAGS.data_dir, FLAGS.eng_vocab_size, FLAGS.hin_vocab_size)

    save_path = os.path.join(FLAGS.train_dir, "summary/")
    with tf.Session() as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)
        test_writer = tf.train.SummaryWriter(os.path.join(save_path, 'test'),
                                             graph=sess.graph)
        train_writer = tf.train.SummaryWriter(os.path.join(save_path, 'train'),
                                              graph=sess.graph)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(en_dev, fr_dev)
        train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        perplexity_eval_summary = tf.Summary()
        perplexity_train_summary = tf.Summary()
        eps1 = exp_decay(float("inf"))
        #print(eps1)
        decode1 = sampling(eps1, float("inf"), _buckets[-1][1] + 1)

        while current_step < 80001:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])
            eps = exp_decay(current_step * 1.0)
            #print(eps)
            decode = sampling(eps, current_step * 1.0, _buckets[-1][1] + 1)
            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _, _ = model.step(sess, encoder_inputs,
                                            decoder_inputs, target_weights,
                                            bucket_id, decode, False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            #loss += step_loss / FLAGS.steps_per_checkpoint
            loss = step_loss
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                bucket_trainvalue = perplexity_train_summary.value.add()
                bucket_trainvalue.tag = "peplexity_trainbucket_%d" % bucket_id
                bucket_trainvalue.simple_value = perplexity
                train_writer.add_summary(perplexity_train_summary,
                                         model.global_step.eval())
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f bucketid: %d epsilon: %f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity, bucket_id, eps))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                learning_rate = tf.scalar_summary('learning_rate',
                                                  model.learning_rate_decay_op)
                learning_str = sess.run(learning_rate)
                train_writer.add_summary(learning_str,
                                         model.global_step.eval())
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "translate.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _, _ = model.step(sess, encoder_inputs,
                                                    decoder_inputs,
                                                    target_weights, bucket_id,
                                                    decode1, True)
                    eval_ppx = math.exp(
                        float(eval_loss)) if eval_loss < 300 else float("inf")
                    bucket_value = perplexity_eval_summary.value.add()
                    bucket_value.tag = "peplexity_evalbucket_%d" % bucket_id
                    bucket_value.simple_value = eval_ppx
                    test_writer.add_summary(perplexity_eval_summary,
                                            model.global_step.eval())
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()
Beispiel #20
0
def train():
    """Train a en->fr translation model using WMT data."""
    from_train = None
    to_train = None
    from_dev = None
    to_dev = None
    print(FLAGS.data_dir)
    print(FLAGS.to_dev_data)
    if FLAGS.from_train_data and FLAGS.to_train_data:
        from_train_data = FLAGS.from_train_data
        to_train_data = FLAGS.to_train_data
        from_dev_data = from_train_data
        to_dev_data = to_train_data
        if FLAGS.from_dev_data and FLAGS.to_dev_data:
            from_dev_data = FLAGS.from_dev_data
            to_dev_data = FLAGS.to_dev_data
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data(
            FLAGS.data_dir, from_train_data, to_train_data, from_dev_data,
            to_dev_data, FLAGS.from_vocab_size, FLAGS.to_vocab_size)
    else:
        # Prepare WMT data.
        print("Preparing WMT data in %s" % FLAGS.data_dir)
        from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_wmt_data(
            FLAGS.data_dir, FLAGS.from_vocab_size, FLAGS.to_vocab_size)

    with tf.Session(config=config) as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(from_dev, to_dev)
        train_set = read_data(from_train, to_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "translate.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(_buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        float(eval_loss)) if eval_loss < 300 else float("inf")
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()
Beispiel #21
0
def train():
    np.set_printoptions(suppress=True)
    """Train a en->fr translation model using WMT data."""
    # Prepare WMT data.
    print("Preparing WMT data in %s" % env.config.get("model", "data_dir"))
    en_train, fr_train, type_train, en_dev, fr_dev, type_dev, en_test, fr_test, type_test, _, _ = data_utils.prepare_wmt_data(
        env.config.get("model", "data_dir"),
        env.config.getint("model", "en_vocab_size"),
        latent=True,
        n_sense=env.config.getint("model", "num_z"))

    with tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                          allow_soft_placement=True)) as sess:

        # Create model.
        print("Creating %d layers of %d units." % (env.config.getint(
            "model", "num_layers"), env.config.getint("model", "size")))
        model = create_model(sess, False)

        show_all_variables()

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              env.config.getint("model", "max_train_data_size"))
        dev_set, _ = read_data(en_dev, fr_dev, type_dev)
        #dev_set, _ = read_data(en_test, fr_test, type_test)
        train_set, train_order = read_data(en_train,
                                           fr_train,
                                           type_train,
                                           max_size=None)
        #test_set = read_data(en_test, fr_test, type_test, env.config.getint("model",.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
        train_total_size = int(sum(train_bucket_sizes))

        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        dev_bucket_sizes = [len(dev_set[b]) for b in xrange(len(_buckets))]
        dev_total_size = int(sum(dev_bucket_sizes))

        # set env.config.getint("model",.steps_per_checkpoint = half/ epoch

        batch_size = env.config.getint("model", "batch_size")
        num_z = env.config.getint("model", "num_z")
        n_epoch = env.config.getint("model", "n_epoch")
        steps_per_epoch = int(train_total_size / batch_size)
        steps_per_dev = int(dev_total_size / batch_size)

        steps_per_checkpoint = steps_per_dev * 4
        total_steps = steps_per_epoch * n_epoch

        # reports
        print(_buckets)
        print("Train:")
        print("total: {}".format(train_total_size))
        print("buckets: ", train_bucket_sizes)
        print("Dev:")
        print("total: {}".format(dev_total_size))
        print("buckets: ", dev_bucket_sizes)
        print()
        print("Steps_per_epoch:", steps_per_epoch)
        print("Total_steps:", total_steps)
        print("Steps_per_checkpoint:", steps_per_checkpoint)

        with_labeled_data = True
        isSGD = False

        # This is the training loop
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        his = []
        local_alpha = 0.05
        low_ppx = 10000000
        low_ppx_step = 0

        dite = DataIterator(model, train_set, len(train_buckets_scale), num_z,
                            batch_size, train_buckets_scale, train_order)

        iteType = env.config.getint('model', 'iteType')
        if iteType == 0:
            print("withRandom")
            ite = dite.next_random()
        elif iteType == 1:
            print("withSequence")
            ite = dite.next_sequence()
        elif iteType == 2:
            print("withOrder")
            assert (batch_size == 1)
            ite = dite.next_sequence_continous()

        while current_step < total_steps:

            # for training data
            if with_labeled_data:

                start_time = time.time()

                encoder_inputs, decoder_inputs, target_weights, hiddens, bucket_id = ite.next(
                )

                print(len(encoder_inputs))

                _, _, _, L, norm, Q = model.batch_step(
                    sess,
                    encoder_inputs,
                    decoder_inputs,
                    target_weights,
                    bucket_id,
                    labeled=True,
                    true_hidden_inputs=hiddens)
                step_time += (time.time() - start_time) / steps_per_checkpoint
                loss += (-L) / steps_per_checkpoint / batch_size
                current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % steps_per_checkpoint == 0:
                print("--------------------", "TRAIN", current_step,
                      "-------------------")
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                train_ppx = perplexity
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(
                    env.config.get("model", "train_dir"), "translate.ckpt")
                if env.config.getboolean('model', "saveCheckpoint"):
                    print("Saving model....")
                    model.saver.save(sess,
                                     checkpoint_path,
                                     global_step=model.global_step)
                step_time, loss = 0.0, 0.0

                # dev data
                print("--------------------", "DEV", current_step,
                      "-------------------")
                Q, L, cost, accuracy, eval_ppx = evaluate(sess,
                                                          model,
                                                          dev_set,
                                                          _buckets,
                                                          name="dev",
                                                          show_stat=True,
                                                          show_basic=True,
                                                          show_sample=True)

                his.append(
                    [current_step, Q, L, cost, accuracy, train_ppx, eval_ppx])
                if eval_ppx < low_ppx:
                    low_ppx = eval_ppx
                    low_ppx_step = current_step

                sys.stdout.flush()
                # Decrease learning rate if current eval ppl is larger
                if len(previous_losses) > 5 and eval_ppx > max(
                        previous_losses[-5:]):
                    break
                    #sess.run(model.learning_rate_decay_op)
                previous_losses.append(eval_ppx)

                # increase alpha
                if env.config.getboolean("model", "withAlpha"):
                    if local_alpha + 0.1 <= 1.0:
                        local_alpha += 0.1
                        with tf.variable_scope('', reuse=True) as scope:
                            alpha = tf.get_variable(
                                "embedding_rnn_seq2seq_latent/alpha")
                            sess.run(alpha.assign([local_alpha]))
                    print("alpha", local_alpha)
                    print()

    low_index = 0
    low_ppx = 1000000000
    for i in xrange(len(his)):
        ep = his[i][-1]
        if low_ppx > ep:
            low_ppx = ep
            low_index = i

    theone = his[low_index]
    print(theone[0], "{:2f}/{:2f}".format(theone[-2], theone[-1]), theone[-3])

    df = pd.DataFrame(his)
    df.columns = [
        "step", "Q", "L", "cost", "Accuracy", "Train_ppx", "Eval_ppx"
    ]
    df.to_csv(os.path.join(env.config.get("model", "train_dir"), "log.csv"))