def get_heading_loss(self, heading_scores, heading_res_norm,
                         heading_class_label, heading_res_norm_label):

        heading_class_loss = F.cross_entropy(heading_scores,
                                             heading_class_label)

        # b, NUM_HEADING_BIN -> b, 1
        heading_res_norm_select = torch.gather(heading_res_norm, 1,
                                               heading_class_label.view(-1, 1))

        heading_res_norm_loss = huber_loss(heading_res_norm_select.squeeze(1) -
                                           heading_res_norm_label,
                                           delta=1.0)

        return heading_class_loss, heading_res_norm_loss
    def get_size_loss(self, size_scores, size_res_norm, size_class_label,
                      size_res_label_norm):
        batch_size = size_scores.shape[0]
        size_class_loss = F.cross_entropy(size_scores, size_class_label)

        # b, NUM_SIZE_CLUSTER, 3 -> b, 1, 3
        size_res_norm_select = torch.gather(
            size_res_norm, 1,
            size_class_label.view(batch_size, 1, 1).expand(batch_size, 1, 3))

        size_norm_dist = torch.norm(size_res_label_norm -
                                    size_res_norm_select.squeeze(1),
                                    2,
                                    dim=-1)

        size_res_norm_loss = huber_loss(size_norm_dist, delta=1.0)

        return size_class_loss, size_res_norm_loss
    def get_corner_loss(self, preds, gts):

        center_label, heading_label, size_label = gts
        center_preds, heading_preds, size_preds = preds

        corners_3d_gt = get_box3d_corners_helper(center_label, heading_label,
                                                 size_label)
        corners_3d_gt_flip = get_box3d_corners_helper(center_label,
                                                      heading_label + np.pi,
                                                      size_label)

        corners_3d_pred = get_box3d_corners_helper(center_preds, heading_preds,
                                                   size_preds)

        # N, 8, 3
        corners_dist = torch.min(
            torch.norm(corners_3d_pred - corners_3d_gt, 2, dim=-1).mean(-1),
            torch.norm(corners_3d_pred - corners_3d_gt_flip, 2,
                       dim=-1).mean(-1))
        # corners_dist = torch.norm(corners_3d_pred - corners_3d_gt, 2, dim=-1)
        corners_loss = huber_loss(corners_dist, delta=1.0)

        return corners_loss, corners_3d_gt
Ejemplo n.º 4
0
    def get_loss(self):
        end_points = self.end_points
        cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
            logits=end_points['cls_logits'], labels=self.placeholders['class_labels']))
        tf.summary.scalar('classification loss', cls_loss)
        # is_obj_mask = tf.to_float(tf.not_equal(self.placeholders['class_labels'], 0))
        train_reg_mask = tf.to_float(self.placeholders['train_regression'])
        center_x_cls_loss = tf.reduce_mean(train_reg_mask*tf.nn.sparse_softmax_cross_entropy_with_logits(\
           logits=end_points['center_x_scores'], labels=self.placeholders['center_bin_x_labels']))
        center_z_cls_loss = tf.reduce_mean(train_reg_mask*tf.nn.sparse_softmax_cross_entropy_with_logits(\
           logits=end_points['center_z_scores'], labels=self.placeholders['center_bin_z_labels']))
        bin_x_onehot = tf.one_hot(self.placeholders['center_bin_x_labels'],
            depth=NUM_CENTER_BIN,
            on_value=1, off_value=0, axis=-1) # BxNUM_CENTER_BIN
        # NOTICE: labels['center_x_residuals'] is already normalized
        center_x_residuals_normalized = tf.reduce_sum(end_points['center_x_residuals_normalized']*tf.to_float(bin_x_onehot), axis=-1) # B
        center_x_residuals_dist = tf.norm(self.placeholders['center_x_res_labels'] - center_x_residuals_normalized, axis=-1)
        center_x_res_loss = huber_loss(train_reg_mask*center_x_residuals_dist, delta=1.0)
        bin_z_onehot = tf.one_hot(self.placeholders['center_bin_z_labels'],
            depth=NUM_CENTER_BIN,
            on_value=1, off_value=0, axis=-1) # BxNUM_CENTER_BIN
        center_z_residuals_normalized = tf.reduce_sum(end_points['center_z_residuals_normalized']*tf.to_float(bin_z_onehot), axis=-1) # B
        center_z_residuals_dist = tf.norm(self.placeholders['center_z_res_labels'] - center_z_residuals_normalized, axis=-1)
        center_z_res_loss = huber_loss(train_reg_mask*center_z_residuals_dist, delta=1.0)
        # y is directly regressed
        center_y_residuals_dist = tf.norm(self.placeholders['center_y_res_labels'] - tf.gather(end_points['center_y_residuals'], 0, axis=-1), axis=-1)
        center_y_res_loss = huber_loss(train_reg_mask*center_y_residuals_dist, delta=1.0)
        tf.summary.scalar('center_x  class loss', center_x_cls_loss)
        tf.summary.scalar('center_z  class loss', center_z_cls_loss)
        tf.summary.scalar('center_x residual loss', center_x_res_loss)
        tf.summary.scalar('center_y residual loss', center_y_res_loss)
        tf.summary.scalar('center_z residual loss', center_z_res_loss)
        # Heading loss
        heading_class_loss = tf.reduce_mean( \
            train_reg_mask*tf.nn.sparse_softmax_cross_entropy_with_logits( \
            logits=end_points['heading_scores'], labels=self.placeholders['heading_bin_labels']))
        hcls_onehot = tf.one_hot(self.placeholders['heading_bin_labels'],
            depth=NUM_HEADING_BIN,
            on_value=1, off_value=0, axis=-1) # BxNxNUM_HEADING_BIN
        heading_residual_normalized_label = self.placeholders['heading_res_labels']
        heading_res_dist = tf.norm(tf.reduce_sum( \
            end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=-1) - \
            heading_residual_normalized_label)
        heading_res_loss = huber_loss(train_reg_mask*heading_res_dist, delta=1.0)
        tf.summary.scalar('heading class loss', heading_class_loss)
        tf.summary.scalar('heading residual loss', heading_res_loss)
        # Size loss
        size_class_loss = tf.reduce_mean( \
            train_reg_mask*tf.nn.sparse_softmax_cross_entropy_with_logits( \
            logits=end_points['size_scores'], labels=self.placeholders['size_class_labels']))

        scls_onehot = tf.one_hot(self.placeholders['size_class_labels'],
            depth=NUM_SIZE_CLUSTER,
            on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER
        scls_onehot_tiled = tf.tile(tf.expand_dims( \
            tf.to_float(scls_onehot), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3
        predicted_size_residual_normalized = tf.reduce_sum( \
            end_points['size_residuals_normalized']*scls_onehot_tiled, axis=1) # Bx3

        size_residual_label_normalized = self.placeholders['size_res_labels'] # Bx3

        size_dist = tf.norm(size_residual_label_normalized - predicted_size_residual_normalized, axis=-1)
        size_res_loss = huber_loss(train_reg_mask*size_dist, delta=1.0)
        tf.summary.scalar('size class loss', size_class_loss)
        tf.summary.scalar('size residual loss', size_res_loss)

        obj_cls_weight = 1
        cls_weight = 1
        res_weight = 1
        total_loss = obj_cls_weight * cls_loss + \
            cls_weight * (center_x_cls_loss + center_z_cls_loss + heading_class_loss + size_class_loss) + \
            res_weight * (center_x_res_loss + center_z_res_loss + center_y_res_loss + heading_res_loss + size_res_loss)

        loss_endpoints = {
            #'size_class_loss': size_class_loss,
            'size_res_loss': size_res_loss,
            #'heading_class_loss': heading_class_loss,
            #'heading_res_loss': heading_res_loss,
            #'center_x_cls_loss': center_x_cls_loss,
            #'center_z_cls_loss': center_z_cls_loss,
            #'center_x_res_loss': center_x_res_loss,
            #'center_z_res_loss': center_z_res_loss,
            #'center_y_res_loss': center_y_res_loss,
            #'mask_loss': cls_loss
            #'mean_size_label': mean_size_label,
            'size_residuals_normalized': end_points['size_residuals_normalized']
        }
        return total_loss, loss_endpoints
Ejemplo n.º 5
0
    def get_loss(self):
        pls = self.placeholders
        end_points = self.end_points
        batch_size = self.batch_size
        # 3D Segmentation loss
        mask_loss = focal_loss(
            end_points['foreground_logits'],
            tf.one_hot(pls['seg_labels'], NUM_SEG_CLASSES, axis=-1))
        tf.summary.scalar('mask loss', mask_loss)
        #return mask_loss, {}
        # gather box estimation labels of foreground points
        labels_fg = {}
        for k in pls.keys():
            if k not in [
                    'center_bin_x_labels',
                    'center_bin_z_labels',
                    'center_x_residuals_labels',
                    'center_z_residuals_labels',
                    'center_y_residuals_labels',
                    'heading_bin_labels',
                    'heading_residuals_labels',
                    'size_class_labels',
                    'size_residuals_labels',
            ]:
                continue
            labels_fg[k] = tf.gather_nd(pls[k], end_points['fg_point_indices'])
            if k == 'size_residuals_labels':
                labels_fg[k].set_shape([batch_size, NUM_FG_POINT, 3])
            else:
                labels_fg[k].set_shape([batch_size, NUM_FG_POINT])
        # Center loss
        center_x_cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
           logits=end_points['center_x_scores'], labels=labels_fg['center_bin_x_labels']))
        center_z_cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
           logits=end_points['center_z_scores'], labels=labels_fg['center_bin_z_labels']))
        bin_x_onehot = tf.one_hot(labels_fg['center_bin_x_labels'],
                                  depth=NUM_CENTER_BIN,
                                  on_value=1,
                                  off_value=0,
                                  axis=-1)  # BxNxNUM_CENTER_BIN
        # NOTICE: labels['center_x_residuals'] is already normalized
        center_x_residuals_normalized = tf.reduce_sum(
            end_points['center_x_residuals_normalized'] *
            tf.to_float(bin_x_onehot),
            axis=2)  # BxN
        center_x_residuals_dist = tf.norm(
            labels_fg['center_x_residuals_labels'] -
            center_x_residuals_normalized,
            axis=-1)
        center_x_res_loss = huber_loss(center_x_residuals_dist, delta=2.0)
        bin_z_onehot = tf.one_hot(labels_fg['center_bin_z_labels'],
                                  depth=NUM_CENTER_BIN,
                                  on_value=1,
                                  off_value=0,
                                  axis=-1)  # BxNxNUM_CENTER_BIN
        center_z_residuals_normalized = tf.reduce_sum(
            end_points['center_z_residuals_normalized'] *
            tf.to_float(bin_z_onehot),
            axis=2)  # BxN
        center_z_residuals_dist = tf.norm(
            labels_fg['center_z_residuals_labels'] -
            center_z_residuals_normalized,
            axis=-1)
        center_z_res_loss = huber_loss(center_z_residuals_dist, delta=2.0)
        # y is directly regressed
        center_y_residuals_dist = tf.norm(
            labels_fg['center_y_residuals_labels'] -
            tf.gather(end_points['center_y_residuals'], 0, axis=-1),
            axis=-1)
        center_y_res_loss = huber_loss(center_y_residuals_dist, delta=2.0)
        tf.summary.scalar('center_x  class loss', center_x_cls_loss)
        tf.summary.scalar('center_z  class loss', center_z_cls_loss)
        tf.summary.scalar('center_x residual loss', center_x_res_loss)
        tf.summary.scalar('center_y residual loss', center_y_res_loss)
        tf.summary.scalar('center_z residual loss', center_z_res_loss)
        # Heading loss
        heading_class_loss = tf.reduce_mean( \
            tf.nn.sparse_softmax_cross_entropy_with_logits( \
            logits=end_points['heading_scores'], labels=labels_fg['heading_bin_labels']))
        hcls_onehot = tf.one_hot(labels_fg['heading_bin_labels'],
                                 depth=NUM_HEADING_BIN,
                                 on_value=1,
                                 off_value=0,
                                 axis=-1)  # BxNxNUM_HEADING_BIN
        heading_residual_normalized_label = labels_fg[
            'heading_residuals_labels']
        heading_res_dist = tf.norm(heading_residual_normalized_label - tf.reduce_sum( \
            end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=2))
        heading_res_loss = huber_loss(heading_res_dist, delta=1.0)
        tf.summary.scalar('heading class loss', heading_class_loss)
        tf.summary.scalar('heading residual loss', heading_res_loss)
        # Size loss
        size_class_loss = tf.reduce_mean( \
            tf.nn.sparse_softmax_cross_entropy_with_logits( \
            logits=end_points['size_scores'], labels=labels_fg['size_class_labels']))

        scls_onehot = tf.one_hot(labels_fg['size_class_labels'],
                                 depth=NUM_SIZE_CLUSTER,
                                 on_value=1,
                                 off_value=0,
                                 axis=-1)  # BxNxNUM_SIZE_CLUSTER
        scls_onehot_tiled = tf.tile(tf.expand_dims( \
            tf.to_float(scls_onehot), -1), [1,1,1,3]) # BxNxNUM_SIZE_CLUSTERx3
        predicted_size_residual_normalized = tf.reduce_sum( \
            end_points['size_residuals_normalized']*scls_onehot_tiled, axis=2) # BxNx3

        size_residual_label_normalized = labels_fg[
            'size_residuals_labels']  # BxNx3

        size_dist = tf.norm(size_residual_label_normalized -
                            predicted_size_residual_normalized,
                            axis=-1)
        size_res_loss = huber_loss(size_dist, delta=1.0)
        tf.summary.scalar('size class loss', size_class_loss)
        tf.summary.scalar('size residual loss', size_res_loss)

        seg_weight = 0.1
        cls_weight = 10
        res_weight = 10
        total_loss = seg_weight * mask_loss + \
            cls_weight * (center_x_cls_loss + center_z_cls_loss + heading_class_loss + size_class_loss) + \
            res_weight * (center_x_res_loss + center_z_res_loss + center_y_res_loss + heading_res_loss + size_res_loss)
        loss_endpoints = {
            'size_class_loss': size_class_loss,
            'size_res_loss': size_res_loss,
            'heading_class_loss': heading_class_loss,
            'heading_res_loss': heading_res_loss,
            'center_x_cls_loss': center_x_cls_loss,
            'center_z_cls_loss': center_z_cls_loss,
            'center_x_res_loss': center_x_res_loss,
            'center_z_res_loss': center_z_res_loss,
            'center_y_res_loss': center_y_res_loss,
            'mask_loss': mask_loss
        }

        return total_loss, loss_endpoints
    def get_center_loss(self, pred_offsets, gt_offsets):

        center_dist = torch.norm(gt_offsets - pred_offsets, 2, dim=-1)
        center_loss = huber_loss(center_dist, delta=3.0)

        return center_loss
def train_network(input_train_hdf5, input_val_hdf5, gpu, pre_trained_checkpoint, epochs, batch_size, logs_path, save_dir):

    # Create log directory if it does not exist
    if not os.path.exists(logs_path):
        os.makedirs(logs_path)

    # Set enviroment variable to set the GPU to use
    if gpu != -1:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    else:
        print('Set tensorflow on CPU')
        os.environ["CUDA_VISIBLE_DEVICES"] = ""

    # Define number of epochs and batch size, where to save logs, etc...
    iter_disp = 10
    start_lr = args.learning_rate

    # Avoid allocating the whole memory
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

    # Regularization value
    L2NormConst = 0.001

    # Build model and get references to placeholders
    driving_model = model.DrivingModelAutoEncoder(training_mode=True)
    model_in = driving_model.input
    model_out = driving_model.output
    model_drop = driving_model.dropout_control

    # Add input image/steering angle on summary
    tf.summary.image("input_image", model_in, 10)
    tf.summary.image("output_image", model_out, 10)

    # Loss is binary cross-entropy
    # Get all model "parameters" that are trainable
    train_vars = tf.trainable_variables()
    # Trying now L2 loss (maybe not good idea)
    with tf.name_scope("L2_LOSS"):
        #cross_entropy = -1. * model_in * tf.log(model_out) - (1. - model_in) * tf.log(1. - model_out)
        #loss = tf.reduce_mean(cross_entropy)
        #loss = tf.nn.l2_loss(model_in-model_out)
        loss = tf.reduce_mean(util.huber_loss(model_out, model_in))

    # Solver configuration
    # Get ops to update moving_mean and moving_variance from batch_norm
    # Reference: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.name_scope("Solver"):
        global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = start_lr
        # decay every 10000 steps with a base of 0.96
        learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                                   5000, 0.9, staircase=True)

        # Basically update the batch_norm moving averages before the training step
        # http://ruishu.io/2016/12/27/batchnorm/
        with tf.control_dependencies(update_ops):
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # Initialize all random variables (Weights/Bias)
    sess.run(tf.global_variables_initializer())

    # Load checkpoint if needed
    if pre_trained_checkpoint:
        # Load tensorflow model
        print("Loading pre-trained model: %s" % args.checkpoint_dir)
        # Create saver object to save/load training checkpoint
        saver = tf.train.Saver(max_to_keep=None)
        saver.restore(sess, args.checkpoint_dir)
    else:
        # Just create saver for saving checkpoints
        saver = tf.train.Saver(max_to_keep=None)

    # Monitor loss, learning_rate, global_step, etc...
    tf.summary.scalar("loss_train", loss)
    tf.summary.scalar("learning_rate", learning_rate)
    tf.summary.scalar("global_step", global_step)
    # merge all summaries into a single op
    merged_summary_op = tf.summary.merge_all()

    # Configure where to save the logs for tensorboard
    summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

    data = HandleData(path=input_train_hdf5, path_val=input_val_hdf5)
    num_images_epoch = int(data.get_num_images() / batch_size)
    print('Num samples',data.get_num_images(), 'Iterations per epoch:', num_images_epoch, 'batch size:', batch_size)

    # For each epoch
    for epoch in range(epochs):
        for i in range(int(data.get_num_images() / batch_size)):
            # Get training batch
            xs_train, ys_train = data.LoadTrainBatch(batch_size, should_augment=False)

            # Send training batch to tensorflow graph (Dropout enabled)
            train_step.run(feed_dict={model_in: xs_train, model_drop: 0.8})

            # write logs at every iteration
            summary = merged_summary_op.eval(feed_dict={model_in: xs_train, model_drop: 1.0})
            summary_writer.add_summary(summary, epoch * batch_size + i)

        # Save checkpoint after each epoch
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        checkpoint_path = os.path.join(save_dir, "model")
        filename = saver.save(sess, checkpoint_path, global_step=epoch)
        print("Model saved in file: %s" % filename)

        # Shuffle data at each epoch end
        print("Shuffle data")
        data.shuffleData()

    print("Run the command line:\n" \
          "--> tensorboard --logdir=./logs " \
          "\nThen open http://0.0.0.0:6006/ into your web browser")
def train_network(input_list, input_val_hdf5, gpu, pre_trained_checkpoint, epochs, batch_size, logs_path, save_dir, gpu_frac):

    # Create log directory if it does not exist
    if not os.path.exists(logs_path):
        os.makedirs(logs_path)

    # Set enviroment variable to set the GPU to use
    if gpu != -1:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    else:
        print('Set tensorflow on CPU')
        os.environ["CUDA_VISIBLE_DEVICES"] = ""

    # Get file list
    list_tfrecord_files = HandleData.get_list_from_file(input_list)

    # Create the graph input part (Responsible to load files, do augmentations, etc...)
    images, labels = util.create_input_graph(list_tfrecord_files, epochs, batch_size, do_augment = False)

    # Build Graph
    driving_model = model.DrivingModelAutoEncoder(input=images, use_placeholder = False)
    model_out = driving_model.output

    # Add input image/steering angle on summary
    tf.summary.image("input_image", images, 6)
    tf.summary.image("output_image", model_out, 6)

    # Define number of epochs and batch size, where to save logs, etc...
    iter_disp = 10
    start_lr = args.learning_rate

    # Avoid allocating the whole memory
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_frac)
    sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

    # Get all model "parameters" that are trainable
    train_vars = tf.trainable_variables()
    with tf.name_scope("Loss"):
        #cross_entropy = -1. * model_in * tf.log(model_out) - (1. - model_in) * tf.log(1. - model_out)
        loss = tf.reduce_mean(util.huber_loss(images, model_out))
        #loss = tf.nn.l2_loss(images - model_out)


    # Get ops to update moving_mean and moving_variance from batch_norm
    # Reference: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    # Solver configuration
    with tf.name_scope("Solver"):
        #global_step = tf.Variable(0, name='global_step', trainable=False)
        global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = start_lr
        # decay every 10000 steps with a base of 0.96
        learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                                   10000, 0.2, staircase=True)

        # Basically update the batch_norm moving averages before the training step
        # http://ruishu.io/2016/12/27/batchnorm/
        with tf.control_dependencies(update_ops):
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # Initialize all random variables (Weights/Bias)
    init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())
    sess.run(init_op)

    # Start input enqueue threads.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # Load checkpoint if needed
    if pre_trained_checkpoint:
        # Load tensorflow model
        print("Loading pre-trained model: %s" % args.checkpoint_dir)
        # Create saver object to save/load training checkpoint
        saver = tf.train.Saver(max_to_keep=None)
        saver.restore(sess, args.checkpoint_dir)

        # If learning_rate is set reset the global variable
        assign_globabl_step_op = tf.assign(global_step, 0)
        sess.run(assign_globabl_step_op)

    else:
        # Just create saver for saving checkpoints
        saver = tf.train.Saver(max_to_keep=None)

    # Monitor loss, learning_rate, global_step, etc...
    tf.summary.scalar("loss_train", loss)
    tf.summary.scalar("learning_rate", learning_rate)
    tf.summary.scalar("global_step", global_step)
    # merge all summaries into a single op
    merged_summary_op = tf.summary.merge_all()

    # Configure where to save the logs for tensorboard
    summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

    try:
        step = 0
        count_model = 0
        while not coord.should_stop():
            start_time = time.time()

            # Run one step of the model.  The return values are
            # the activations from the `train_op` (which is
            # discarded) and the `loss` op.  To inspect the values
            # of your ops or variables, you may include them in
            # the list passed to sess.run() and the value tensors
            # will be returned in the tuple from the call.
            _, loss_value = sess.run([train_step, loss])

            duration = time.time() - start_time

            # Print an overview fairly often.
            if step % 100 == 0:
                print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,duration))

            if step % iter_disp == 0:
                # write logs
                summary = merged_summary_op.eval()
                summary_writer.add_summary(summary, batch_size + step)

            # Save model
            if step % 4000 == 0:
                # Save checkpoint after each epoch
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)
                checkpoint_path = os.path.join(save_dir, "model")
                filename = saver.save(sess, checkpoint_path, global_step=count_model)
                print("Model saved in file: %s" % filename)
                count_model += 1

            step += 1

    except tf.errors.OutOfRangeError:
        #print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
        print('error')
    finally:
        # When done, ask the threads to stop.
        coord.request_stop()

        # Wait for threads to finish.
    coord.join(threads)
    sess.close()
Ejemplo n.º 9
0
def train_network(input_train_hdf5, input_val_hdf5, gpu,
                  pre_trained_checkpoint, epochs, batch_size, logs_path,
                  save_dir):

    # Create log directory if it does not exist
    if not os.path.exists(logs_path):
        os.makedirs(logs_path)

    # Set enviroment variable to set the GPU to use
    if gpu != -1:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    else:
        print('Set tensorflow on CPU')
        os.environ["CUDA_VISIBLE_DEVICES"] = ""

    # Define number of epochs and batch size, where to save logs, etc...
    iter_disp = 10
    start_lr = args.learning_rate

    # Avoid allocating the whole memory
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        gpu_options=gpu_options))

    # Regularization value
    L2NormConst = 0.001

    # Build model and get references to placeholders
    driving_model = model.DrivingModelAutoEncoder(training_mode=True)
    model_in = driving_model.input
    model_out = driving_model.output
    model_drop = driving_model.dropout_control

    # Add input image/steering angle on summary
    tf.summary.image("input_image", model_in, 10)
    tf.summary.image("output_image", model_out, 10)

    # Loss is binary cross-entropy
    # Get all model "parameters" that are trainable
    train_vars = tf.trainable_variables()
    # Trying now L2 loss (maybe not good idea)
    with tf.name_scope("L2_LOSS"):
        #cross_entropy = -1. * model_in * tf.log(model_out) - (1. - model_in) * tf.log(1. - model_out)
        #loss = tf.reduce_mean(cross_entropy)
        #loss = tf.nn.l2_loss(model_in-model_out)
        loss = tf.reduce_mean(util.huber_loss(model_out, model_in))

    # Solver configuration
    # Get ops to update moving_mean and moving_variance from batch_norm
    # Reference: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.name_scope("Solver"):
        global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = start_lr
        # decay every 10000 steps with a base of 0.96
        learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                   global_step,
                                                   5000,
                                                   0.9,
                                                   staircase=True)

        # Basically update the batch_norm moving averages before the training step
        # http://ruishu.io/2016/12/27/batchnorm/
        with tf.control_dependencies(update_ops):
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(
                loss, global_step=global_step)

    # Initialize all random variables (Weights/Bias)
    sess.run(tf.global_variables_initializer())

    # Load checkpoint if needed
    if pre_trained_checkpoint:
        # Load tensorflow model
        print("Loading pre-trained model: %s" % args.checkpoint_dir)
        # Create saver object to save/load training checkpoint
        saver = tf.train.Saver(max_to_keep=None)
        saver.restore(sess, args.checkpoint_dir)
    else:
        # Just create saver for saving checkpoints
        saver = tf.train.Saver(max_to_keep=None)

    # Monitor loss, learning_rate, global_step, etc...
    tf.summary.scalar("loss_train", loss)
    tf.summary.scalar("learning_rate", learning_rate)
    tf.summary.scalar("global_step", global_step)
    # merge all summaries into a single op
    merged_summary_op = tf.summary.merge_all()

    # Configure where to save the logs for tensorboard
    summary_writer = tf.summary.FileWriter(logs_path,
                                           graph=tf.get_default_graph())

    data = HandleData(path=input_train_hdf5, path_val=input_val_hdf5)
    num_images_epoch = int(data.get_num_images() / batch_size)
    print('Num samples', data.get_num_images(), 'Iterations per epoch:',
          num_images_epoch, 'batch size:', batch_size)

    # For each epoch
    for epoch in range(epochs):
        for i in range(int(data.get_num_images() / batch_size)):
            # Get training batch
            xs_train, ys_train = data.LoadTrainBatch(batch_size,
                                                     should_augment=False)

            # Send training batch to tensorflow graph (Dropout enabled)
            train_step.run(feed_dict={model_in: xs_train, model_drop: 0.8})

            # write logs at every iteration
            summary = merged_summary_op.eval(feed_dict={
                model_in: xs_train,
                model_drop: 1.0
            })
            summary_writer.add_summary(summary, epoch * batch_size + i)

        # Save checkpoint after each epoch
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        checkpoint_path = os.path.join(save_dir, "model")
        filename = saver.save(sess, checkpoint_path, global_step=epoch)
        print("Model saved in file: %s" % filename)

        # Shuffle data at each epoch end
        print("Shuffle data")
        data.shuffleData()

    print("Run the command line:\n" \
          "--> tensorboard --logdir=./logs " \
          "\nThen open http://0.0.0.0:6006/ into your web browser")