def train(epochs=200, predict=False):
    param_file = "%s/param.json" % train_dir
    params = param_unserierlize(param_file, init_params={"epoch": 0, "global_step": 0})
    global_epoch, global_step_val = int(params["epoch"]), int(params["global_step"])
    """Train for a number of steps."""
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(global_step_val), trainable=False)

        # Calculate the learning rate schedule.
        num_batchs_per_epochs = int(corpus.num_batchs_per_epochs(BATCH_SIZE))
        print("num_batches_per_epoch: %d" % num_batchs_per_epochs)
        decay_steps = int(num_batchs_per_epochs / gpu_num * NUM_EPOCHS_PER_DECAY)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                        global_step,
                                        decay_steps,
                                        LEARNING_RATE_DECAY_FACTOR,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        # opt = tf.train.GradientDescentOptimizer(lr)
        opt = tf.train.AdamOptimizer(lr)

        # Calculate the gradients for each model tower.
        tower_grads = []
        tower_acc = []
        tower_feeds = []
        for i in xrange(gpu_num):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                    # all towers.
                    batch_input = tf.placeholder(tf.float32, [None, 15*15*planes])
                    batch_labels = tf.placeholder(tf.float32, shape=[None])
                    tower_feeds.append((batch_input, batch_labels))
                    loss = tower_loss(scope, batch_input, batch_labels)

                    # all accuracy
                    tower_acc.append(tf.get_collection('accuracy', scope)[0])

                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

                    # Calculate the gradients for the batch of data on this CIFAR tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # average accuracy
        accuracy = tf.add_n(tower_acc) / len(tower_acc)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)


        # Add a summary to track the learning rate.
        summaries.append(tf.scalar_summary('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.histogram_summary(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.histogram_summary(var.op.name, var))

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        # Create a saver.
        # saver = tf.train.Saver(tf.all_variables())
        saver = tf.train.Saver()

        # Build the summary operation from the last tower summaries.
        summary_op = tf.merge_summary(summaries)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_MEMERY_ALLOCATE)
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
            gpu_options=gpu_options)
        )
        sess.run(init)

        # restore model
        restore_model(sess, train_dir, saver)
        if predict:
            return sess, saver

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        graph_def = sess.graph.as_graph_def(add_shapes=True)
        summary_writer = tf.train.SummaryWriter(train_dir,
                                                graph_def=graph_def)

        avg_loss, avg_acc = [], []
        epochs_step = global_epoch + 1
        step = 0
        while epochs_step <= (global_epoch + epochs):
            step += gpu_num
            start_time = time.time()
            # _, loss_value, acc_value, global_step_val = sess.run([train_op, loss, accuracy, global_step])
            feeds = {}
            for idx in xrange(gpu_num):
                samples = corpus.next_fetch_rows(BATCH_SIZE)
                feature = np.array([sample[0].get_states(flatten=True) for sample in samples], dtype=np.float32)
                labels = np.array([sample[1] for sample in samples], dtype=np.float32)
                feeds[tower_feeds[idx][0]] = feature
                feeds[tower_feeds[idx][1]] = labels
            _, loss_value, acc_value, global_step_val, learn_rating = sess.run(
                [train_op, loss, accuracy, global_step, lr],
                feed_dict=feeds)
            elapsed_time = int((time.time() - start_time) * 1000)

            avg_loss.append(loss_value)
            avg_acc.append(acc_value)

            global_step_val = int(global_step_val)
            if global_step_val % 10 == 0:
                logger.info(
                    "train policy rollout multi_GPU network, epoch=%d, step=%d, loss=%.6f, acc=%.6f, lr=%.6f, time=%d(ms)" % (
                        epochs_step, step, loss_value, acc_value, learn_rating, elapsed_time))

            # if global_step_val % 100 == 0:
            #     summary_str = sess.run(summary_op)
            #     summary_writer.add_summary(summary_str, step)
            if step > num_batchs_per_epochs:
                step = step % num_batchs_per_epochs
                epochs_step += 1
                average_loss = sum(avg_loss) / len(avg_loss)
                average_acc = sum(avg_acc) / len(avg_acc)
                avg_loss, avg_acc = [], []

                logger.info("train policy rollout multi_GPU network, epochs=%d, average_loss=%.7f, average_acc=%.7f" %
                            (epochs_step, average_loss, average_acc))
                # Save the model checkpoint periodically.
                if epochs_step % 5 == 0:
                    param_serierlize(param_file, {"epoch": int(epochs_step), "global_step": int(global_step_val)})
                    filename = save_model(sess, train_dir, saver,
                                          "policy_rollout_epoch_%d" % epochs_step,
                                          global_step=global_step_val)
                    logger.info("save policy rollout multi_GPU model: %s" % filename)
Esempio n. 2
0
def train(epochs=200):
    param_file = "%s/param.json" % train_dir
    params = param_unserierlize(param_file, init_params={"epoch": 0, "global_step": 0})
    global_epoch, global_step_val = int(params["epoch"]), int(params["global_step"])
    """Train for a number of steps."""
    with tf.Graph().as_default(), tf.device('/job:ps/task:0/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(global_step_val), trainable=False)

        # Calculate the learning rate schedule.
        num_batchs_per_epochs = corpus.num_batchs_per_epochs(BATCH_SIZE)
        print("num_batches_per_epoch: %d" % num_batchs_per_epochs)
        decay_steps = int(num_batchs_per_epochs * NUM_EPOCHS_PER_DECAY)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                        global_step,
                                        decay_steps,
                                        LEARNING_RATE_DECAY_FACTOR,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        opt = tf.train.GradientDescentOptimizer(lr)

        # Calculate the gradients for each model tower.
        tower_grads = []
        tower_acc = []
        for i in xrange(len(CLUSTER_CONFIG["worker_hosts"])):
            gpu_device = CLUSTER_CONFIG["worker_hosts"][i][1]
            with tf.device('/job:worker/task:%d/%s' % (i, gpu_device)):
                with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                    # all towers.
                    loss = tower_loss(scope, CLUSTER_CONFIG["worker_hosts"][i][2])

                    # all accuracy
                    tower_acc.append(tf.get_collection('accuracy', scope)[0])

                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

                    # Calculate the gradients for the batch of data on this CIFAR tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # average accuracy
        accuracy = tf.add_n(tower_acc) / len(tower_acc)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)


        # Add a summary to track the learning rate.
        summaries.append(tf.scalar_summary('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.histogram_summary(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.histogram_summary(var.op.name, var))

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.merge_summary(summaries)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        sess = tf.Session("grpc://" + CLUSTER_CONFIG["worker_hosts"][0][0], config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=True,
            gpu_options=gpu_options))
        sess.run(init)

        # restore model
        restore_model(sess, train_dir, saver)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        graph_def = sess.graph.as_graph_def(add_shapes=True)
        summary_writer = tf.train.SummaryWriter(train_dir,
                                                graph_def=graph_def)

        avg_loss, avg_acc = [0] * num_batchs_per_epochs, [0] * num_batchs_per_epochs
        epochs_step = global_epoch + 1
        step = 0
        while epochs_step <= (global_epoch + epochs):
            step += 1
            start_time = time.time()
            _, loss_value, acc_value, global_step_val = sess.run([train_op, loss, accuracy, global_step])
            elapsed_time = int((time.time() - start_time) * 1000)

            avg_loss[step % num_batchs_per_epochs] = loss_value
            avg_acc[step % num_batchs_per_epochs] = acc_value

            global_step_val = int(global_step_val)
            if global_step_val % 2 == 0:
                logger.info("train policy dl dist network, epoch=%d, step=%d, loss=%.6f, acc=%.6f, time=%d(ms)" % (
                    epochs_step, step, loss_value, acc_value, elapsed_time))

            if global_step_val % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)
            if step > num_batchs_per_epochs:
                step = step % num_batchs_per_epochs
                epochs_step += 1
                average_loss = sum(avg_loss) / len(avg_loss)
                average_acc = sum(avg_acc) / len(avg_acc)

                logger.info("train policy dl dist network, epochs=%d, average_loss=%.7f, average_acc=%.7f" %
                            (epochs_step, average_loss, average_acc))
            # Save the model checkpoint periodically.
            if step % num_batchs_per_epochs == 0 and epochs_step % 20 == 0:
                param_serierlize(param_file, {"epoch": int(epochs_step), "global_step": int(global_step_val)})
                filename = save_model(sess, train_dir, saver,
                                      "policy_dl_epoch_%d" % epochs_step,
                                      global_step=global_step_val)
                logger.info("save policy dl dist model: %s" % filename)
Esempio n. 3
0
def network(epochs=200, predict=False):
    param_file = "%s/param.json" % train_dir
    params = param_unserierlize(param_file, init_params={"global_step": 0})
    global_step_val = int(params["global_step"])
    """Train for a number of steps."""
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(global_step_val), trainable=False)

        # Calculate the learning rate schedule.
        num_batchs_per_epochs = corpus.num_batchs_per_epochs(BATCH_SIZE)
        print("num_batches_per_epoch: %d" % num_batchs_per_epochs)
        decay_steps = int(num_batchs_per_epochs / gpu_num * NUM_EPOCHS_PER_DECAY)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                        global_step,
                                        decay_steps,
                                        LEARNING_RATE_DECAY_FACTOR,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        # opt = tf.train.GradientDescentOptimizer(lr)
        opt = tf.train.AdamOptimizer(lr)

        # Calculate the gradients for each model tower.
        tower_grads = []
        tower_acc = []
        tower_feeds = []
        tower_logits = []
        for i in xrange(gpu_num):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                    # all towers.
                    batch_input = tf.placeholder(tf.float32, [None, board_size, board_size, planes])
                    batch_labels = tf.placeholder(tf.float32, shape=[None])
                    batch_target = tf.placeholder(tf.float32, shape=[None])
                    tower_feeds.append((batch_input, batch_labels, batch_target))
                    loss, logits = tower_loss(scope, batch_input, batch_labels, batch_target)
                    tower_logits.append(logits)

                    # all accuracy
                    tower_acc.append(tf.get_collection('accuracy', scope)[0])

                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

                    # Calculate the gradients for the batch of data on this CIFAR tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # average accuracy
        accuracy = tf.add_n(tower_acc) / len(tower_acc)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)


        # Add a summary to track the learning rate.
        summaries.append(tf.scalar_summary('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.histogram_summary(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.histogram_summary(var.op.name, var))

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        # Create a saver.
        # saver = tf.train.Saver(tf.all_variables())
        saver = tf.train.Saver()

        # Build the summary operation from the last tower summaries.
        summary_op = tf.merge_summary(summaries)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_MEMERY_ALLOCATE)
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
            gpu_options=gpu_options)
        )
        sess.run(init)

        # restore model
        restore_model(sess, train_dir, saver)
        if predict:
            return sess, saver

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        graph_def = sess.graph.as_graph_def(add_shapes=True)
        summary_writer = tf.train.SummaryWriter(train_dir,
                                                graph_def=graph_def)
        return sess, saver, summary_writer, train_op, loss, accuracy, global_step, lr, tower_feeds, tower_logits
Esempio n. 4
0
def network(epochs=200, predict=False):
    param_file = "%s/param.json" % train_dir
    params = param_unserierlize(param_file, init_params={"global_step": 0})
    global_step_val = int(params["global_step"])
    """Train for a number of steps."""
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(global_step_val),
            trainable=False)

        # Calculate the learning rate schedule.
        num_batchs_per_epochs = corpus.num_batchs_per_epochs(BATCH_SIZE)
        print("num_batches_per_epoch: %d" % num_batchs_per_epochs)
        decay_steps = int(num_batchs_per_epochs / gpu_num *
                          NUM_EPOCHS_PER_DECAY)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                        global_step,
                                        decay_steps,
                                        LEARNING_RATE_DECAY_FACTOR,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        # opt = tf.train.GradientDescentOptimizer(lr)
        opt = tf.train.AdamOptimizer(lr)

        # Calculate the gradients for each model tower.
        tower_grads = []
        tower_acc = []
        tower_feeds = []
        tower_logits = []
        for i in xrange(gpu_num):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                    # all towers.
                    batch_input = tf.placeholder(
                        tf.float32, [None, board_size, board_size, planes])
                    batch_labels = tf.placeholder(tf.float32, shape=[None])
                    batch_target = tf.placeholder(tf.float32, shape=[None])
                    tower_feeds.append(
                        (batch_input, batch_labels, batch_target))
                    loss, logits = tower_loss(scope, batch_input, batch_labels,
                                              batch_target)
                    tower_logits.append(logits)

                    # all accuracy
                    tower_acc.append(tf.get_collection('accuracy', scope)[0])

                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                  scope)

                    # Calculate the gradients for the batch of data on this CIFAR tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # average accuracy
        accuracy = tf.add_n(tower_acc) / len(tower_acc)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)

        # Add a summary to track the learning rate.
        summaries.append(tf.scalar_summary('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.histogram_summary(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.histogram_summary(var.op.name, var))

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        # Create a saver.
        # saver = tf.train.Saver(tf.all_variables())
        saver = tf.train.Saver()

        # Build the summary operation from the last tower summaries.
        summary_op = tf.merge_summary(summaries)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=GPU_MEMERY_ALLOCATE)
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=False,
                                                gpu_options=gpu_options))
        sess.run(init)

        # restore model
        restore_model(sess, train_dir, saver)
        if predict:
            return sess, saver

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        graph_def = sess.graph.as_graph_def(add_shapes=True)
        summary_writer = tf.train.SummaryWriter(train_dir, graph_def=graph_def)
        return sess, saver, summary_writer, train_op, loss, accuracy, global_step, lr, tower_feeds, tower_logits
Esempio n. 5
0
def train(epochs=200):
    param_file = "%s/param.json" % train_dir
    params = param_unserierlize(param_file,
                                init_params={
                                    "epoch": 0,
                                    "global_step": 0
                                })
    global_epoch, global_step_val = int(params["epoch"]), int(
        params["global_step"])
    """Train for a number of steps."""
    with tf.Graph().as_default(), tf.device('/job:ps/task:0/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(global_step_val),
            trainable=False)

        # Calculate the learning rate schedule.
        num_batchs_per_epochs = corpus.num_batchs_per_epochs(BATCH_SIZE)
        print("num_batches_per_epoch: %d" % num_batchs_per_epochs)
        decay_steps = int(num_batchs_per_epochs * NUM_EPOCHS_PER_DECAY)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                        global_step,
                                        decay_steps,
                                        LEARNING_RATE_DECAY_FACTOR,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        opt = tf.train.GradientDescentOptimizer(lr)

        # Calculate the gradients for each model tower.
        tower_grads = []
        tower_acc = []
        for i in xrange(len(CLUSTER_CONFIG["worker_hosts"])):
            gpu_device = CLUSTER_CONFIG["worker_hosts"][i][1]
            with tf.device('/job:worker/task:%d/%s' % (i, gpu_device)):
                with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                    # all towers.
                    loss = tower_loss(scope,
                                      CLUSTER_CONFIG["worker_hosts"][i][2])

                    # all accuracy
                    tower_acc.append(tf.get_collection('accuracy', scope)[0])

                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                  scope)

                    # Calculate the gradients for the batch of data on this CIFAR tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # average accuracy
        accuracy = tf.add_n(tower_acc) / len(tower_acc)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)

        # Add a summary to track the learning rate.
        summaries.append(tf.scalar_summary('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.histogram_summary(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.histogram_summary(var.op.name, var))

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.merge_summary(summaries)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        sess = tf.Session("grpc://" + CLUSTER_CONFIG["worker_hosts"][0][0],
                          config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=True,
                                                gpu_options=gpu_options))
        sess.run(init)

        # restore model
        restore_model(sess, train_dir, saver)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        graph_def = sess.graph.as_graph_def(add_shapes=True)
        summary_writer = tf.train.SummaryWriter(train_dir, graph_def=graph_def)

        avg_loss, avg_acc = [0] * num_batchs_per_epochs, [
            0
        ] * num_batchs_per_epochs
        epochs_step = global_epoch + 1
        step = 0
        while epochs_step <= (global_epoch + epochs):
            step += 1
            start_time = time.time()
            _, loss_value, acc_value, global_step_val = sess.run(
                [train_op, loss, accuracy, global_step])
            elapsed_time = int((time.time() - start_time) * 1000)

            avg_loss[step % num_batchs_per_epochs] = loss_value
            avg_acc[step % num_batchs_per_epochs] = acc_value

            global_step_val = int(global_step_val)
            if global_step_val % 2 == 0:
                logger.info(
                    "train policy dl dist network, epoch=%d, step=%d, loss=%.6f, acc=%.6f, time=%d(ms)"
                    % (epochs_step, step, loss_value, acc_value, elapsed_time))

            if global_step_val % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)
            if step > num_batchs_per_epochs:
                step = step % num_batchs_per_epochs
                epochs_step += 1
                average_loss = sum(avg_loss) / len(avg_loss)
                average_acc = sum(avg_acc) / len(avg_acc)

                logger.info(
                    "train policy dl dist network, epochs=%d, average_loss=%.7f, average_acc=%.7f"
                    % (epochs_step, average_loss, average_acc))
            # Save the model checkpoint periodically.
            if step % num_batchs_per_epochs == 0 and epochs_step % 20 == 0:
                param_serierlize(param_file, {
                    "epoch": int(epochs_step),
                    "global_step": int(global_step_val)
                })
                filename = save_model(sess,
                                      train_dir,
                                      saver,
                                      "policy_dl_epoch_%d" % epochs_step,
                                      global_step=global_step_val)
                logger.info("save policy dl dist model: %s" % filename)