Example #1
0
def train():
    # Prepare Headline data.
    print("Preparing Headline data in %s" % FLAGS.data_dir)
    src_train, dest_train, src_dev, dest_dev, _, _ = data_utils.prepare_headline_data(
        FLAGS.data_dir, FLAGS.vocab_size)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    session_conf = tf.ConfigProto(gpu_options=gpu_options, )
    with tf.Session(config=session_conf) as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(src_dev, dest_dev)
        train_set = read_data(src_train, dest_train, FLAGS.max_train_data_size)
        #每个bucket有多少个句子(这里的句子已经向量化了)的list,如[21,43,56]
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        trainbuckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []

        Epochs = 300
        for i in range(Epochs):
            #等比例随机选取bucket
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval(间隔) in trainbuckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(trainbuckets_scale))
                if trainbuckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()

            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            #print('*'*80)
            #print(encoder_inputs)
            #在训练时,forward_only为Flase表示需要更新参数
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    '''
          #self.learning_rate_decay_op = self.learning_rate.assign(
          #self.learning_rate * learning_rate_decay_factor)
          相当于更新了learning_rate
          '''
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "headline_large.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    #在测试时,forward_only为True表示不需要更新参数
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        float(eval_loss)) if eval_loss < 300 else float("inf")
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()
Example #2
0
def train():
    # Prepare Headline data.
    print("Preparing Headline data in %s" % FLAGS.data_dir)
    src_train, dest_train, src_dev, dest_dev, _, _ = data_utils.prepare_headline_data(
        FLAGS.data_dir, FLAGS.vocab_size)

    # device config for CPU usage
    config = tf.ConfigProto(
        device_count={"CPU": 4},  # limit to 4 CPU usage
        inter_op_parallelism_threads=1,
        intra_op_parallelism_threads=2)  # n threads parallel for ops

    with tf.Session(config=config) as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(src_dev, dest_dev)
        train_set = read_data(src_train, dest_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        trainbuckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in trainbuckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(trainbuckets_scale))
                if trainbuckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "headline_large.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                # Run evals on development set and print their perplexity.
                for bucket_id in xrange(len(buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        float(eval_loss)) if eval_loss < 300 else float("inf")
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))
                sys.stdout.flush()
Example #3
0
def train():
    # Prepare Headline data.
    print("Preparing Headline data in %s" % FLAGS.data_dir)
    src_train, dest_train, src_dev, dest_dev, _, _ = data_utils.prepare_headline_data(
        FLAGS.data_dir, FLAGS.vocab_size)

    # device config for CPU usage
    # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to 4 CPU usage
    #                  inter_op_parallelism_threads=1,
    #                  intra_op_parallelism_threads=2) # n threads parallel for ops

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(src_dev, dest_dev)
        train_set = read_data(src_train, dest_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        trainbuckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        # 显示时间用
        metrics = '  '.join(
            ['\r{:.1f}%', '{}/{}', 'loss={:.3f}', 'gradients={:.3f}', '{}/{}'])
        bars_max = 20

        for current_step in range(FLAGS.num_epoch):
            print("\n")
            print('Epoch {}:'.format(current_step))
            epoch_trained = 0
            batch_loss = []
            batch_gradients = []
            time_start = time.time()
            # index_sum = 0
            while True:
                # Choose a bucket according to data distribution. We pick a random number
                # in [0, 1] and use the corresponding interval in trainbuckets_scale.
                random_number_01 = np.random.random_sample()
                bucket_id = min([
                    i for i in xrange(len(trainbuckets_scale))
                    if trainbuckets_scale[i] > random_number_01
                ])

                # Get a batch and make a step.
                encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                    train_set, bucket_id)

                step_gradients, step_loss, _ = model.step(
                    sess, encoder_inputs, decoder_inputs, target_weights,
                    bucket_id, False)
                epoch_trained += FLAGS.batch_size
                batch_loss.append(step_loss)
                batch_gradients.append(step_gradients)
                time_now = time.time()
                time_spend = time_now - time_start
                time_estimate = time_spend / (epoch_trained /
                                              FLAGS.num_per_epoch)
                percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100
                # bars = math.floor(percent / 100 * bars_max)
                sys.stdout.write(
                    metrics.format(
                        percent,
                        epoch_trained,
                        FLAGS.num_per_epoch,
                        # 对batch loss 取平均值
                        np.mean(batch_loss),
                        np.mean(batch_gradients),
                        data_utils.time(time_spend),
                        data_utils.time(time_estimate)))
                print("\n")
                sys.stdout.flush()
                # index_sum += 1
                # if index_sum > 4:
                #     sys.exit()

                if FLAGS.num_per_epoch < epoch_trained:
                    break

        # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:

                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "headline_large.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)