def train(): # Prepare Headline data. print("Preparing Headline data in %s" % FLAGS.data_dir) src_train, dest_train, src_dev, dest_dev, _, _ = data_utils.prepare_headline_data( FLAGS.data_dir, FLAGS.vocab_size) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) session_conf = tf.ConfigProto(gpu_options=gpu_options, ) with tf.Session(config=session_conf) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(src_dev, dest_dev) train_set = read_data(src_train, dest_train, FLAGS.max_train_data_size) #每个bucket有多少个句子(这里的句子已经向量化了)的list,如[21,43,56] train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. trainbuckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] Epochs = 300 for i in range(Epochs): #等比例随机选取bucket # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval(间隔) in trainbuckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(trainbuckets_scale)) if trainbuckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) #print('*'*80) #print(encoder_inputs) #在训练时,forward_only为Flase表示需要更新参数 _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): ''' #self.learning_rate_decay_op = self.learning_rate.assign( #self.learning_rate * learning_rate_decay_factor) 相当于更新了learning_rate ''' sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "headline_large.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) #在测试时,forward_only为True表示不需要更新参数 _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): # Prepare Headline data. print("Preparing Headline data in %s" % FLAGS.data_dir) src_train, dest_train, src_dev, dest_dev, _, _ = data_utils.prepare_headline_data( FLAGS.data_dir, FLAGS.vocab_size) # device config for CPU usage config = tf.ConfigProto( device_count={"CPU": 4}, # limit to 4 CPU usage inter_op_parallelism_threads=1, intra_op_parallelism_threads=2) # n threads parallel for ops with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(src_dev, dest_dev) train_set = read_data(src_train, dest_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. trainbuckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in trainbuckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(trainbuckets_scale)) if trainbuckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "headline_large.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in xrange(len(buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): # Prepare Headline data. print("Preparing Headline data in %s" % FLAGS.data_dir) src_train, dest_train, src_dev, dest_dev, _, _ = data_utils.prepare_headline_data( FLAGS.data_dir, FLAGS.vocab_size) # device config for CPU usage # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to 4 CPU usage # inter_op_parallelism_threads=1, # intra_op_parallelism_threads=2) # n threads parallel for ops config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(src_dev, dest_dev) train_set = read_data(src_train, dest_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. trainbuckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. # 显示时间用 metrics = ' '.join( ['\r{:.1f}%', '{}/{}', 'loss={:.3f}', 'gradients={:.3f}', '{}/{}']) bars_max = 20 for current_step in range(FLAGS.num_epoch): print("\n") print('Epoch {}:'.format(current_step)) epoch_trained = 0 batch_loss = [] batch_gradients = [] time_start = time.time() # index_sum = 0 while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in trainbuckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(trainbuckets_scale)) if trainbuckets_scale[i] > random_number_01 ]) # Get a batch and make a step. encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) step_gradients, step_loss, _ = model.step( sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) epoch_trained += FLAGS.batch_size batch_loss.append(step_loss) batch_gradients.append(step_gradients) time_now = time.time() time_spend = time_now - time_start time_estimate = time_spend / (epoch_trained / FLAGS.num_per_epoch) percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100 # bars = math.floor(percent / 100 * bars_max) sys.stdout.write( metrics.format( percent, epoch_trained, FLAGS.num_per_epoch, # 对batch loss 取平均值 np.mean(batch_loss), np.mean(batch_gradients), data_utils.time(time_spend), data_utils.time(time_estimate))) print("\n") sys.stdout.flush() # index_sum += 1 # if index_sum > 4: # sys.exit() if FLAGS.num_per_epoch < epoch_trained: break # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "headline_large.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step)