def get_heading_loss(self, heading_scores, heading_res_norm, heading_class_label, heading_res_norm_label): heading_class_loss = F.cross_entropy(heading_scores, heading_class_label) # b, NUM_HEADING_BIN -> b, 1 heading_res_norm_select = torch.gather(heading_res_norm, 1, heading_class_label.view(-1, 1)) heading_res_norm_loss = huber_loss(heading_res_norm_select.squeeze(1) - heading_res_norm_label, delta=1.0) return heading_class_loss, heading_res_norm_loss
def get_size_loss(self, size_scores, size_res_norm, size_class_label, size_res_label_norm): batch_size = size_scores.shape[0] size_class_loss = F.cross_entropy(size_scores, size_class_label) # b, NUM_SIZE_CLUSTER, 3 -> b, 1, 3 size_res_norm_select = torch.gather( size_res_norm, 1, size_class_label.view(batch_size, 1, 1).expand(batch_size, 1, 3)) size_norm_dist = torch.norm(size_res_label_norm - size_res_norm_select.squeeze(1), 2, dim=-1) size_res_norm_loss = huber_loss(size_norm_dist, delta=1.0) return size_class_loss, size_res_norm_loss
def get_corner_loss(self, preds, gts): center_label, heading_label, size_label = gts center_preds, heading_preds, size_preds = preds corners_3d_gt = get_box3d_corners_helper(center_label, heading_label, size_label) corners_3d_gt_flip = get_box3d_corners_helper(center_label, heading_label + np.pi, size_label) corners_3d_pred = get_box3d_corners_helper(center_preds, heading_preds, size_preds) # N, 8, 3 corners_dist = torch.min( torch.norm(corners_3d_pred - corners_3d_gt, 2, dim=-1).mean(-1), torch.norm(corners_3d_pred - corners_3d_gt_flip, 2, dim=-1).mean(-1)) # corners_dist = torch.norm(corners_3d_pred - corners_3d_gt, 2, dim=-1) corners_loss = huber_loss(corners_dist, delta=1.0) return corners_loss, corners_3d_gt
def get_loss(self): end_points = self.end_points cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\ logits=end_points['cls_logits'], labels=self.placeholders['class_labels'])) tf.summary.scalar('classification loss', cls_loss) # is_obj_mask = tf.to_float(tf.not_equal(self.placeholders['class_labels'], 0)) train_reg_mask = tf.to_float(self.placeholders['train_regression']) center_x_cls_loss = tf.reduce_mean(train_reg_mask*tf.nn.sparse_softmax_cross_entropy_with_logits(\ logits=end_points['center_x_scores'], labels=self.placeholders['center_bin_x_labels'])) center_z_cls_loss = tf.reduce_mean(train_reg_mask*tf.nn.sparse_softmax_cross_entropy_with_logits(\ logits=end_points['center_z_scores'], labels=self.placeholders['center_bin_z_labels'])) bin_x_onehot = tf.one_hot(self.placeholders['center_bin_x_labels'], depth=NUM_CENTER_BIN, on_value=1, off_value=0, axis=-1) # BxNUM_CENTER_BIN # NOTICE: labels['center_x_residuals'] is already normalized center_x_residuals_normalized = tf.reduce_sum(end_points['center_x_residuals_normalized']*tf.to_float(bin_x_onehot), axis=-1) # B center_x_residuals_dist = tf.norm(self.placeholders['center_x_res_labels'] - center_x_residuals_normalized, axis=-1) center_x_res_loss = huber_loss(train_reg_mask*center_x_residuals_dist, delta=1.0) bin_z_onehot = tf.one_hot(self.placeholders['center_bin_z_labels'], depth=NUM_CENTER_BIN, on_value=1, off_value=0, axis=-1) # BxNUM_CENTER_BIN center_z_residuals_normalized = tf.reduce_sum(end_points['center_z_residuals_normalized']*tf.to_float(bin_z_onehot), axis=-1) # B center_z_residuals_dist = tf.norm(self.placeholders['center_z_res_labels'] - center_z_residuals_normalized, axis=-1) center_z_res_loss = huber_loss(train_reg_mask*center_z_residuals_dist, delta=1.0) # y is directly regressed center_y_residuals_dist = tf.norm(self.placeholders['center_y_res_labels'] - tf.gather(end_points['center_y_residuals'], 0, axis=-1), axis=-1) center_y_res_loss = huber_loss(train_reg_mask*center_y_residuals_dist, delta=1.0) tf.summary.scalar('center_x class loss', center_x_cls_loss) tf.summary.scalar('center_z class loss', center_z_cls_loss) tf.summary.scalar('center_x residual loss', center_x_res_loss) tf.summary.scalar('center_y residual loss', center_y_res_loss) tf.summary.scalar('center_z residual loss', center_z_res_loss) # Heading loss heading_class_loss = tf.reduce_mean( \ train_reg_mask*tf.nn.sparse_softmax_cross_entropy_with_logits( \ logits=end_points['heading_scores'], labels=self.placeholders['heading_bin_labels'])) hcls_onehot = tf.one_hot(self.placeholders['heading_bin_labels'], depth=NUM_HEADING_BIN, on_value=1, off_value=0, axis=-1) # BxNxNUM_HEADING_BIN heading_residual_normalized_label = self.placeholders['heading_res_labels'] heading_res_dist = tf.norm(tf.reduce_sum( \ end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=-1) - \ heading_residual_normalized_label) heading_res_loss = huber_loss(train_reg_mask*heading_res_dist, delta=1.0) tf.summary.scalar('heading class loss', heading_class_loss) tf.summary.scalar('heading residual loss', heading_res_loss) # Size loss size_class_loss = tf.reduce_mean( \ train_reg_mask*tf.nn.sparse_softmax_cross_entropy_with_logits( \ logits=end_points['size_scores'], labels=self.placeholders['size_class_labels'])) scls_onehot = tf.one_hot(self.placeholders['size_class_labels'], depth=NUM_SIZE_CLUSTER, on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER scls_onehot_tiled = tf.tile(tf.expand_dims( \ tf.to_float(scls_onehot), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3 predicted_size_residual_normalized = tf.reduce_sum( \ end_points['size_residuals_normalized']*scls_onehot_tiled, axis=1) # Bx3 size_residual_label_normalized = self.placeholders['size_res_labels'] # Bx3 size_dist = tf.norm(size_residual_label_normalized - predicted_size_residual_normalized, axis=-1) size_res_loss = huber_loss(train_reg_mask*size_dist, delta=1.0) tf.summary.scalar('size class loss', size_class_loss) tf.summary.scalar('size residual loss', size_res_loss) obj_cls_weight = 1 cls_weight = 1 res_weight = 1 total_loss = obj_cls_weight * cls_loss + \ cls_weight * (center_x_cls_loss + center_z_cls_loss + heading_class_loss + size_class_loss) + \ res_weight * (center_x_res_loss + center_z_res_loss + center_y_res_loss + heading_res_loss + size_res_loss) loss_endpoints = { #'size_class_loss': size_class_loss, 'size_res_loss': size_res_loss, #'heading_class_loss': heading_class_loss, #'heading_res_loss': heading_res_loss, #'center_x_cls_loss': center_x_cls_loss, #'center_z_cls_loss': center_z_cls_loss, #'center_x_res_loss': center_x_res_loss, #'center_z_res_loss': center_z_res_loss, #'center_y_res_loss': center_y_res_loss, #'mask_loss': cls_loss #'mean_size_label': mean_size_label, 'size_residuals_normalized': end_points['size_residuals_normalized'] } return total_loss, loss_endpoints
def get_loss(self): pls = self.placeholders end_points = self.end_points batch_size = self.batch_size # 3D Segmentation loss mask_loss = focal_loss( end_points['foreground_logits'], tf.one_hot(pls['seg_labels'], NUM_SEG_CLASSES, axis=-1)) tf.summary.scalar('mask loss', mask_loss) #return mask_loss, {} # gather box estimation labels of foreground points labels_fg = {} for k in pls.keys(): if k not in [ 'center_bin_x_labels', 'center_bin_z_labels', 'center_x_residuals_labels', 'center_z_residuals_labels', 'center_y_residuals_labels', 'heading_bin_labels', 'heading_residuals_labels', 'size_class_labels', 'size_residuals_labels', ]: continue labels_fg[k] = tf.gather_nd(pls[k], end_points['fg_point_indices']) if k == 'size_residuals_labels': labels_fg[k].set_shape([batch_size, NUM_FG_POINT, 3]) else: labels_fg[k].set_shape([batch_size, NUM_FG_POINT]) # Center loss center_x_cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\ logits=end_points['center_x_scores'], labels=labels_fg['center_bin_x_labels'])) center_z_cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\ logits=end_points['center_z_scores'], labels=labels_fg['center_bin_z_labels'])) bin_x_onehot = tf.one_hot(labels_fg['center_bin_x_labels'], depth=NUM_CENTER_BIN, on_value=1, off_value=0, axis=-1) # BxNxNUM_CENTER_BIN # NOTICE: labels['center_x_residuals'] is already normalized center_x_residuals_normalized = tf.reduce_sum( end_points['center_x_residuals_normalized'] * tf.to_float(bin_x_onehot), axis=2) # BxN center_x_residuals_dist = tf.norm( labels_fg['center_x_residuals_labels'] - center_x_residuals_normalized, axis=-1) center_x_res_loss = huber_loss(center_x_residuals_dist, delta=2.0) bin_z_onehot = tf.one_hot(labels_fg['center_bin_z_labels'], depth=NUM_CENTER_BIN, on_value=1, off_value=0, axis=-1) # BxNxNUM_CENTER_BIN center_z_residuals_normalized = tf.reduce_sum( end_points['center_z_residuals_normalized'] * tf.to_float(bin_z_onehot), axis=2) # BxN center_z_residuals_dist = tf.norm( labels_fg['center_z_residuals_labels'] - center_z_residuals_normalized, axis=-1) center_z_res_loss = huber_loss(center_z_residuals_dist, delta=2.0) # y is directly regressed center_y_residuals_dist = tf.norm( labels_fg['center_y_residuals_labels'] - tf.gather(end_points['center_y_residuals'], 0, axis=-1), axis=-1) center_y_res_loss = huber_loss(center_y_residuals_dist, delta=2.0) tf.summary.scalar('center_x class loss', center_x_cls_loss) tf.summary.scalar('center_z class loss', center_z_cls_loss) tf.summary.scalar('center_x residual loss', center_x_res_loss) tf.summary.scalar('center_y residual loss', center_y_res_loss) tf.summary.scalar('center_z residual loss', center_z_res_loss) # Heading loss heading_class_loss = tf.reduce_mean( \ tf.nn.sparse_softmax_cross_entropy_with_logits( \ logits=end_points['heading_scores'], labels=labels_fg['heading_bin_labels'])) hcls_onehot = tf.one_hot(labels_fg['heading_bin_labels'], depth=NUM_HEADING_BIN, on_value=1, off_value=0, axis=-1) # BxNxNUM_HEADING_BIN heading_residual_normalized_label = labels_fg[ 'heading_residuals_labels'] heading_res_dist = tf.norm(heading_residual_normalized_label - tf.reduce_sum( \ end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=2)) heading_res_loss = huber_loss(heading_res_dist, delta=1.0) tf.summary.scalar('heading class loss', heading_class_loss) tf.summary.scalar('heading residual loss', heading_res_loss) # Size loss size_class_loss = tf.reduce_mean( \ tf.nn.sparse_softmax_cross_entropy_with_logits( \ logits=end_points['size_scores'], labels=labels_fg['size_class_labels'])) scls_onehot = tf.one_hot(labels_fg['size_class_labels'], depth=NUM_SIZE_CLUSTER, on_value=1, off_value=0, axis=-1) # BxNxNUM_SIZE_CLUSTER scls_onehot_tiled = tf.tile(tf.expand_dims( \ tf.to_float(scls_onehot), -1), [1,1,1,3]) # BxNxNUM_SIZE_CLUSTERx3 predicted_size_residual_normalized = tf.reduce_sum( \ end_points['size_residuals_normalized']*scls_onehot_tiled, axis=2) # BxNx3 size_residual_label_normalized = labels_fg[ 'size_residuals_labels'] # BxNx3 size_dist = tf.norm(size_residual_label_normalized - predicted_size_residual_normalized, axis=-1) size_res_loss = huber_loss(size_dist, delta=1.0) tf.summary.scalar('size class loss', size_class_loss) tf.summary.scalar('size residual loss', size_res_loss) seg_weight = 0.1 cls_weight = 10 res_weight = 10 total_loss = seg_weight * mask_loss + \ cls_weight * (center_x_cls_loss + center_z_cls_loss + heading_class_loss + size_class_loss) + \ res_weight * (center_x_res_loss + center_z_res_loss + center_y_res_loss + heading_res_loss + size_res_loss) loss_endpoints = { 'size_class_loss': size_class_loss, 'size_res_loss': size_res_loss, 'heading_class_loss': heading_class_loss, 'heading_res_loss': heading_res_loss, 'center_x_cls_loss': center_x_cls_loss, 'center_z_cls_loss': center_z_cls_loss, 'center_x_res_loss': center_x_res_loss, 'center_z_res_loss': center_z_res_loss, 'center_y_res_loss': center_y_res_loss, 'mask_loss': mask_loss } return total_loss, loss_endpoints
def get_center_loss(self, pred_offsets, gt_offsets): center_dist = torch.norm(gt_offsets - pred_offsets, 2, dim=-1) center_loss = huber_loss(center_dist, delta=3.0) return center_loss
def train_network(input_train_hdf5, input_val_hdf5, gpu, pre_trained_checkpoint, epochs, batch_size, logs_path, save_dir): # Create log directory if it does not exist if not os.path.exists(logs_path): os.makedirs(logs_path) # Set enviroment variable to set the GPU to use if gpu != -1: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('Set tensorflow on CPU') os.environ["CUDA_VISIBLE_DEVICES"] = "" # Define number of epochs and batch size, where to save logs, etc... iter_disp = 10 start_lr = args.learning_rate # Avoid allocating the whole memory gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) # Regularization value L2NormConst = 0.001 # Build model and get references to placeholders driving_model = model.DrivingModelAutoEncoder(training_mode=True) model_in = driving_model.input model_out = driving_model.output model_drop = driving_model.dropout_control # Add input image/steering angle on summary tf.summary.image("input_image", model_in, 10) tf.summary.image("output_image", model_out, 10) # Loss is binary cross-entropy # Get all model "parameters" that are trainable train_vars = tf.trainable_variables() # Trying now L2 loss (maybe not good idea) with tf.name_scope("L2_LOSS"): #cross_entropy = -1. * model_in * tf.log(model_out) - (1. - model_in) * tf.log(1. - model_out) #loss = tf.reduce_mean(cross_entropy) #loss = tf.nn.l2_loss(model_in-model_out) loss = tf.reduce_mean(util.huber_loss(model_out, model_in)) # Solver configuration # Get ops to update moving_mean and moving_variance from batch_norm # Reference: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.name_scope("Solver"): global_step = tf.Variable(0, trainable=False) starter_learning_rate = start_lr # decay every 10000 steps with a base of 0.96 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 5000, 0.9, staircase=True) # Basically update the batch_norm moving averages before the training step # http://ruishu.io/2016/12/27/batchnorm/ with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) # Initialize all random variables (Weights/Bias) sess.run(tf.global_variables_initializer()) # Load checkpoint if needed if pre_trained_checkpoint: # Load tensorflow model print("Loading pre-trained model: %s" % args.checkpoint_dir) # Create saver object to save/load training checkpoint saver = tf.train.Saver(max_to_keep=None) saver.restore(sess, args.checkpoint_dir) else: # Just create saver for saving checkpoints saver = tf.train.Saver(max_to_keep=None) # Monitor loss, learning_rate, global_step, etc... tf.summary.scalar("loss_train", loss) tf.summary.scalar("learning_rate", learning_rate) tf.summary.scalar("global_step", global_step) # merge all summaries into a single op merged_summary_op = tf.summary.merge_all() # Configure where to save the logs for tensorboard summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) data = HandleData(path=input_train_hdf5, path_val=input_val_hdf5) num_images_epoch = int(data.get_num_images() / batch_size) print('Num samples',data.get_num_images(), 'Iterations per epoch:', num_images_epoch, 'batch size:', batch_size) # For each epoch for epoch in range(epochs): for i in range(int(data.get_num_images() / batch_size)): # Get training batch xs_train, ys_train = data.LoadTrainBatch(batch_size, should_augment=False) # Send training batch to tensorflow graph (Dropout enabled) train_step.run(feed_dict={model_in: xs_train, model_drop: 0.8}) # write logs at every iteration summary = merged_summary_op.eval(feed_dict={model_in: xs_train, model_drop: 1.0}) summary_writer.add_summary(summary, epoch * batch_size + i) # Save checkpoint after each epoch if not os.path.exists(save_dir): os.makedirs(save_dir) checkpoint_path = os.path.join(save_dir, "model") filename = saver.save(sess, checkpoint_path, global_step=epoch) print("Model saved in file: %s" % filename) # Shuffle data at each epoch end print("Shuffle data") data.shuffleData() print("Run the command line:\n" \ "--> tensorboard --logdir=./logs " \ "\nThen open http://0.0.0.0:6006/ into your web browser")
def train_network(input_list, input_val_hdf5, gpu, pre_trained_checkpoint, epochs, batch_size, logs_path, save_dir, gpu_frac): # Create log directory if it does not exist if not os.path.exists(logs_path): os.makedirs(logs_path) # Set enviroment variable to set the GPU to use if gpu != -1: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('Set tensorflow on CPU') os.environ["CUDA_VISIBLE_DEVICES"] = "" # Get file list list_tfrecord_files = HandleData.get_list_from_file(input_list) # Create the graph input part (Responsible to load files, do augmentations, etc...) images, labels = util.create_input_graph(list_tfrecord_files, epochs, batch_size, do_augment = False) # Build Graph driving_model = model.DrivingModelAutoEncoder(input=images, use_placeholder = False) model_out = driving_model.output # Add input image/steering angle on summary tf.summary.image("input_image", images, 6) tf.summary.image("output_image", model_out, 6) # Define number of epochs and batch size, where to save logs, etc... iter_disp = 10 start_lr = args.learning_rate # Avoid allocating the whole memory gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_frac) sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) # Get all model "parameters" that are trainable train_vars = tf.trainable_variables() with tf.name_scope("Loss"): #cross_entropy = -1. * model_in * tf.log(model_out) - (1. - model_in) * tf.log(1. - model_out) loss = tf.reduce_mean(util.huber_loss(images, model_out)) #loss = tf.nn.l2_loss(images - model_out) # Get ops to update moving_mean and moving_variance from batch_norm # Reference: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Solver configuration with tf.name_scope("Solver"): #global_step = tf.Variable(0, name='global_step', trainable=False) global_step = tf.Variable(0, trainable=False) starter_learning_rate = start_lr # decay every 10000 steps with a base of 0.96 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 10000, 0.2, staircase=True) # Basically update the batch_norm moving averages before the training step # http://ruishu.io/2016/12/27/batchnorm/ with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) # Initialize all random variables (Weights/Bias) init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()) sess.run(init_op) # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Load checkpoint if needed if pre_trained_checkpoint: # Load tensorflow model print("Loading pre-trained model: %s" % args.checkpoint_dir) # Create saver object to save/load training checkpoint saver = tf.train.Saver(max_to_keep=None) saver.restore(sess, args.checkpoint_dir) # If learning_rate is set reset the global variable assign_globabl_step_op = tf.assign(global_step, 0) sess.run(assign_globabl_step_op) else: # Just create saver for saving checkpoints saver = tf.train.Saver(max_to_keep=None) # Monitor loss, learning_rate, global_step, etc... tf.summary.scalar("loss_train", loss) tf.summary.scalar("learning_rate", learning_rate) tf.summary.scalar("global_step", global_step) # merge all summaries into a single op merged_summary_op = tf.summary.merge_all() # Configure where to save the logs for tensorboard summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) try: step = 0 count_model = 0 while not coord.should_stop(): start_time = time.time() # Run one step of the model. The return values are # the activations from the `train_op` (which is # discarded) and the `loss` op. To inspect the values # of your ops or variables, you may include them in # the list passed to sess.run() and the value tensors # will be returned in the tuple from the call. _, loss_value = sess.run([train_step, loss]) duration = time.time() - start_time # Print an overview fairly often. if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,duration)) if step % iter_disp == 0: # write logs summary = merged_summary_op.eval() summary_writer.add_summary(summary, batch_size + step) # Save model if step % 4000 == 0: # Save checkpoint after each epoch if not os.path.exists(save_dir): os.makedirs(save_dir) checkpoint_path = os.path.join(save_dir, "model") filename = saver.save(sess, checkpoint_path, global_step=count_model) print("Model saved in file: %s" % filename) count_model += 1 step += 1 except tf.errors.OutOfRangeError: #print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) print('error') finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close()
def train_network(input_train_hdf5, input_val_hdf5, gpu, pre_trained_checkpoint, epochs, batch_size, logs_path, save_dir): # Create log directory if it does not exist if not os.path.exists(logs_path): os.makedirs(logs_path) # Set enviroment variable to set the GPU to use if gpu != -1: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('Set tensorflow on CPU') os.environ["CUDA_VISIBLE_DEVICES"] = "" # Define number of epochs and batch size, where to save logs, etc... iter_disp = 10 start_lr = args.learning_rate # Avoid allocating the whole memory gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.InteractiveSession(config=tf.ConfigProto( gpu_options=gpu_options)) # Regularization value L2NormConst = 0.001 # Build model and get references to placeholders driving_model = model.DrivingModelAutoEncoder(training_mode=True) model_in = driving_model.input model_out = driving_model.output model_drop = driving_model.dropout_control # Add input image/steering angle on summary tf.summary.image("input_image", model_in, 10) tf.summary.image("output_image", model_out, 10) # Loss is binary cross-entropy # Get all model "parameters" that are trainable train_vars = tf.trainable_variables() # Trying now L2 loss (maybe not good idea) with tf.name_scope("L2_LOSS"): #cross_entropy = -1. * model_in * tf.log(model_out) - (1. - model_in) * tf.log(1. - model_out) #loss = tf.reduce_mean(cross_entropy) #loss = tf.nn.l2_loss(model_in-model_out) loss = tf.reduce_mean(util.huber_loss(model_out, model_in)) # Solver configuration # Get ops to update moving_mean and moving_variance from batch_norm # Reference: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.name_scope("Solver"): global_step = tf.Variable(0, trainable=False) starter_learning_rate = start_lr # decay every 10000 steps with a base of 0.96 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 5000, 0.9, staircase=True) # Basically update the batch_norm moving averages before the training step # http://ruishu.io/2016/12/27/batchnorm/ with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer(learning_rate).minimize( loss, global_step=global_step) # Initialize all random variables (Weights/Bias) sess.run(tf.global_variables_initializer()) # Load checkpoint if needed if pre_trained_checkpoint: # Load tensorflow model print("Loading pre-trained model: %s" % args.checkpoint_dir) # Create saver object to save/load training checkpoint saver = tf.train.Saver(max_to_keep=None) saver.restore(sess, args.checkpoint_dir) else: # Just create saver for saving checkpoints saver = tf.train.Saver(max_to_keep=None) # Monitor loss, learning_rate, global_step, etc... tf.summary.scalar("loss_train", loss) tf.summary.scalar("learning_rate", learning_rate) tf.summary.scalar("global_step", global_step) # merge all summaries into a single op merged_summary_op = tf.summary.merge_all() # Configure where to save the logs for tensorboard summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) data = HandleData(path=input_train_hdf5, path_val=input_val_hdf5) num_images_epoch = int(data.get_num_images() / batch_size) print('Num samples', data.get_num_images(), 'Iterations per epoch:', num_images_epoch, 'batch size:', batch_size) # For each epoch for epoch in range(epochs): for i in range(int(data.get_num_images() / batch_size)): # Get training batch xs_train, ys_train = data.LoadTrainBatch(batch_size, should_augment=False) # Send training batch to tensorflow graph (Dropout enabled) train_step.run(feed_dict={model_in: xs_train, model_drop: 0.8}) # write logs at every iteration summary = merged_summary_op.eval(feed_dict={ model_in: xs_train, model_drop: 1.0 }) summary_writer.add_summary(summary, epoch * batch_size + i) # Save checkpoint after each epoch if not os.path.exists(save_dir): os.makedirs(save_dir) checkpoint_path = os.path.join(save_dir, "model") filename = saver.save(sess, checkpoint_path, global_step=epoch) print("Model saved in file: %s" % filename) # Shuffle data at each epoch end print("Shuffle data") data.shuffleData() print("Run the command line:\n" \ "--> tensorboard --logdir=./logs " \ "\nThen open http://0.0.0.0:6006/ into your web browser")