def train(epochs=200, predict=False): param_file = "%s/param.json" % train_dir params = param_unserierlize(param_file, init_params={"epoch": 0, "global_step": 0}) global_epoch, global_step_val = int(params["epoch"]), int(params["global_step"]) """Train for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(global_step_val), trainable=False) # Calculate the learning rate schedule. num_batchs_per_epochs = int(corpus.num_batchs_per_epochs(BATCH_SIZE)) print("num_batches_per_epoch: %d" % num_batchs_per_epochs) decay_steps = int(num_batchs_per_epochs / gpu_num * NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. # opt = tf.train.GradientDescentOptimizer(lr) opt = tf.train.AdamOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] tower_acc = [] tower_feeds = [] for i in xrange(gpu_num): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # all towers. batch_input = tf.placeholder(tf.float32, [None, 15*15*planes]) batch_labels = tf.placeholder(tf.float32, shape=[None]) tower_feeds.append((batch_input, batch_labels)) loss = tower_loss(scope, batch_input, batch_labels) # all accuracy tower_acc.append(tf.get_collection('accuracy', scope)[0]) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # average accuracy accuracy = tf.add_n(tower_acc) / len(tower_acc) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. # saver = tf.train.Saver(tf.all_variables()) saver = tf.train.Saver() # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_MEMERY_ALLOCATE) sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options) ) sess.run(init) # restore model restore_model(sess, train_dir, saver) if predict: return sess, saver # Start the queue runners. tf.train.start_queue_runners(sess=sess) graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(train_dir, graph_def=graph_def) avg_loss, avg_acc = [], [] epochs_step = global_epoch + 1 step = 0 while epochs_step <= (global_epoch + epochs): step += gpu_num start_time = time.time() # _, loss_value, acc_value, global_step_val = sess.run([train_op, loss, accuracy, global_step]) feeds = {} for idx in xrange(gpu_num): samples = corpus.next_fetch_rows(BATCH_SIZE) feature = np.array([sample[0].get_states(flatten=True) for sample in samples], dtype=np.float32) labels = np.array([sample[1] for sample in samples], dtype=np.float32) feeds[tower_feeds[idx][0]] = feature feeds[tower_feeds[idx][1]] = labels _, loss_value, acc_value, global_step_val, learn_rating = sess.run( [train_op, loss, accuracy, global_step, lr], feed_dict=feeds) elapsed_time = int((time.time() - start_time) * 1000) avg_loss.append(loss_value) avg_acc.append(acc_value) global_step_val = int(global_step_val) if global_step_val % 10 == 0: logger.info( "train policy rollout multi_GPU network, epoch=%d, step=%d, loss=%.6f, acc=%.6f, lr=%.6f, time=%d(ms)" % ( epochs_step, step, loss_value, acc_value, learn_rating, elapsed_time)) # if global_step_val % 100 == 0: # summary_str = sess.run(summary_op) # summary_writer.add_summary(summary_str, step) if step > num_batchs_per_epochs: step = step % num_batchs_per_epochs epochs_step += 1 average_loss = sum(avg_loss) / len(avg_loss) average_acc = sum(avg_acc) / len(avg_acc) avg_loss, avg_acc = [], [] logger.info("train policy rollout multi_GPU network, epochs=%d, average_loss=%.7f, average_acc=%.7f" % (epochs_step, average_loss, average_acc)) # Save the model checkpoint periodically. if epochs_step % 5 == 0: param_serierlize(param_file, {"epoch": int(epochs_step), "global_step": int(global_step_val)}) filename = save_model(sess, train_dir, saver, "policy_rollout_epoch_%d" % epochs_step, global_step=global_step_val) logger.info("save policy rollout multi_GPU model: %s" % filename)
def train(epochs=200): param_file = "%s/param.json" % train_dir params = param_unserierlize(param_file, init_params={"epoch": 0, "global_step": 0}) global_epoch, global_step_val = int(params["epoch"]), int(params["global_step"]) """Train for a number of steps.""" with tf.Graph().as_default(), tf.device('/job:ps/task:0/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(global_step_val), trainable=False) # Calculate the learning rate schedule. num_batchs_per_epochs = corpus.num_batchs_per_epochs(BATCH_SIZE) print("num_batches_per_epoch: %d" % num_batchs_per_epochs) decay_steps = int(num_batchs_per_epochs * NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] tower_acc = [] for i in xrange(len(CLUSTER_CONFIG["worker_hosts"])): gpu_device = CLUSTER_CONFIG["worker_hosts"][i][1] with tf.device('/job:worker/task:%d/%s' % (i, gpu_device)): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # all towers. loss = tower_loss(scope, CLUSTER_CONFIG["worker_hosts"][i][2]) # all accuracy tower_acc.append(tf.get_collection('accuracy', scope)[0]) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # average accuracy accuracy = tf.add_n(tower_acc) / len(tower_acc) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) sess = tf.Session("grpc://" + CLUSTER_CONFIG["worker_hosts"][0][0], config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=True, gpu_options=gpu_options)) sess.run(init) # restore model restore_model(sess, train_dir, saver) # Start the queue runners. tf.train.start_queue_runners(sess=sess) graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(train_dir, graph_def=graph_def) avg_loss, avg_acc = [0] * num_batchs_per_epochs, [0] * num_batchs_per_epochs epochs_step = global_epoch + 1 step = 0 while epochs_step <= (global_epoch + epochs): step += 1 start_time = time.time() _, loss_value, acc_value, global_step_val = sess.run([train_op, loss, accuracy, global_step]) elapsed_time = int((time.time() - start_time) * 1000) avg_loss[step % num_batchs_per_epochs] = loss_value avg_acc[step % num_batchs_per_epochs] = acc_value global_step_val = int(global_step_val) if global_step_val % 2 == 0: logger.info("train policy dl dist network, epoch=%d, step=%d, loss=%.6f, acc=%.6f, time=%d(ms)" % ( epochs_step, step, loss_value, acc_value, elapsed_time)) if global_step_val % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step > num_batchs_per_epochs: step = step % num_batchs_per_epochs epochs_step += 1 average_loss = sum(avg_loss) / len(avg_loss) average_acc = sum(avg_acc) / len(avg_acc) logger.info("train policy dl dist network, epochs=%d, average_loss=%.7f, average_acc=%.7f" % (epochs_step, average_loss, average_acc)) # Save the model checkpoint periodically. if step % num_batchs_per_epochs == 0 and epochs_step % 20 == 0: param_serierlize(param_file, {"epoch": int(epochs_step), "global_step": int(global_step_val)}) filename = save_model(sess, train_dir, saver, "policy_dl_epoch_%d" % epochs_step, global_step=global_step_val) logger.info("save policy dl dist model: %s" % filename)
def network(epochs=200, predict=False): param_file = "%s/param.json" % train_dir params = param_unserierlize(param_file, init_params={"global_step": 0}) global_step_val = int(params["global_step"]) """Train for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(global_step_val), trainable=False) # Calculate the learning rate schedule. num_batchs_per_epochs = corpus.num_batchs_per_epochs(BATCH_SIZE) print("num_batches_per_epoch: %d" % num_batchs_per_epochs) decay_steps = int(num_batchs_per_epochs / gpu_num * NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. # opt = tf.train.GradientDescentOptimizer(lr) opt = tf.train.AdamOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] tower_acc = [] tower_feeds = [] tower_logits = [] for i in xrange(gpu_num): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # all towers. batch_input = tf.placeholder(tf.float32, [None, board_size, board_size, planes]) batch_labels = tf.placeholder(tf.float32, shape=[None]) batch_target = tf.placeholder(tf.float32, shape=[None]) tower_feeds.append((batch_input, batch_labels, batch_target)) loss, logits = tower_loss(scope, batch_input, batch_labels, batch_target) tower_logits.append(logits) # all accuracy tower_acc.append(tf.get_collection('accuracy', scope)[0]) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # average accuracy accuracy = tf.add_n(tower_acc) / len(tower_acc) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. # saver = tf.train.Saver(tf.all_variables()) saver = tf.train.Saver() # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_MEMERY_ALLOCATE) sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options) ) sess.run(init) # restore model restore_model(sess, train_dir, saver) if predict: return sess, saver # Start the queue runners. tf.train.start_queue_runners(sess=sess) graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(train_dir, graph_def=graph_def) return sess, saver, summary_writer, train_op, loss, accuracy, global_step, lr, tower_feeds, tower_logits
def network(epochs=200, predict=False): param_file = "%s/param.json" % train_dir params = param_unserierlize(param_file, init_params={"global_step": 0}) global_step_val = int(params["global_step"]) """Train for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(global_step_val), trainable=False) # Calculate the learning rate schedule. num_batchs_per_epochs = corpus.num_batchs_per_epochs(BATCH_SIZE) print("num_batches_per_epoch: %d" % num_batchs_per_epochs) decay_steps = int(num_batchs_per_epochs / gpu_num * NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. # opt = tf.train.GradientDescentOptimizer(lr) opt = tf.train.AdamOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] tower_acc = [] tower_feeds = [] tower_logits = [] for i in xrange(gpu_num): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # all towers. batch_input = tf.placeholder( tf.float32, [None, board_size, board_size, planes]) batch_labels = tf.placeholder(tf.float32, shape=[None]) batch_target = tf.placeholder(tf.float32, shape=[None]) tower_feeds.append( (batch_input, batch_labels, batch_target)) loss, logits = tower_loss(scope, batch_input, batch_labels, batch_target) tower_logits.append(logits) # all accuracy tower_acc.append(tf.get_collection('accuracy', scope)[0]) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # average accuracy accuracy = tf.add_n(tower_acc) / len(tower_acc) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. # saver = tf.train.Saver(tf.all_variables()) saver = tf.train.Saver() # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=GPU_MEMERY_ALLOCATE) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) sess.run(init) # restore model restore_model(sess, train_dir, saver) if predict: return sess, saver # Start the queue runners. tf.train.start_queue_runners(sess=sess) graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(train_dir, graph_def=graph_def) return sess, saver, summary_writer, train_op, loss, accuracy, global_step, lr, tower_feeds, tower_logits
def train(epochs=200): param_file = "%s/param.json" % train_dir params = param_unserierlize(param_file, init_params={ "epoch": 0, "global_step": 0 }) global_epoch, global_step_val = int(params["epoch"]), int( params["global_step"]) """Train for a number of steps.""" with tf.Graph().as_default(), tf.device('/job:ps/task:0/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(global_step_val), trainable=False) # Calculate the learning rate schedule. num_batchs_per_epochs = corpus.num_batchs_per_epochs(BATCH_SIZE) print("num_batches_per_epoch: %d" % num_batchs_per_epochs) decay_steps = int(num_batchs_per_epochs * NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] tower_acc = [] for i in xrange(len(CLUSTER_CONFIG["worker_hosts"])): gpu_device = CLUSTER_CONFIG["worker_hosts"][i][1] with tf.device('/job:worker/task:%d/%s' % (i, gpu_device)): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # all towers. loss = tower_loss(scope, CLUSTER_CONFIG["worker_hosts"][i][2]) # all accuracy tower_acc.append(tf.get_collection('accuracy', scope)[0]) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # average accuracy accuracy = tf.add_n(tower_acc) / len(tower_acc) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) sess = tf.Session("grpc://" + CLUSTER_CONFIG["worker_hosts"][0][0], config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True, gpu_options=gpu_options)) sess.run(init) # restore model restore_model(sess, train_dir, saver) # Start the queue runners. tf.train.start_queue_runners(sess=sess) graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(train_dir, graph_def=graph_def) avg_loss, avg_acc = [0] * num_batchs_per_epochs, [ 0 ] * num_batchs_per_epochs epochs_step = global_epoch + 1 step = 0 while epochs_step <= (global_epoch + epochs): step += 1 start_time = time.time() _, loss_value, acc_value, global_step_val = sess.run( [train_op, loss, accuracy, global_step]) elapsed_time = int((time.time() - start_time) * 1000) avg_loss[step % num_batchs_per_epochs] = loss_value avg_acc[step % num_batchs_per_epochs] = acc_value global_step_val = int(global_step_val) if global_step_val % 2 == 0: logger.info( "train policy dl dist network, epoch=%d, step=%d, loss=%.6f, acc=%.6f, time=%d(ms)" % (epochs_step, step, loss_value, acc_value, elapsed_time)) if global_step_val % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step > num_batchs_per_epochs: step = step % num_batchs_per_epochs epochs_step += 1 average_loss = sum(avg_loss) / len(avg_loss) average_acc = sum(avg_acc) / len(avg_acc) logger.info( "train policy dl dist network, epochs=%d, average_loss=%.7f, average_acc=%.7f" % (epochs_step, average_loss, average_acc)) # Save the model checkpoint periodically. if step % num_batchs_per_epochs == 0 and epochs_step % 20 == 0: param_serierlize(param_file, { "epoch": int(epochs_step), "global_step": int(global_step_val) }) filename = save_model(sess, train_dir, saver, "policy_dl_epoch_%d" % epochs_step, global_step=global_step_val) logger.info("save policy dl dist model: %s" % filename)