def test_crop_data_removes_edge_data(self): go_data = GoData() array = np.arange(324.0).reshape((1, 18, 18)) cropped_array = go_data.crop_data(array) assert np.array_equal(cropped_array, array[:, 8:10, 8:10])
def test_convert_mat_data_to_numpy_array_can_specify_the_number_of_images_to_extract(self): go_data = GoData() mock_mat_data = Mock() mock_mat_data.get.return_value = np.array([[[[1]]], [[[2]]], [[[3]]]]) transposed_array = go_data.convert_mat_data_to_numpy_array(mock_mat_data, 'images', number_of_samples=2) assert np.array_equal(transposed_array, np.array([[[[1]]], [[[2]]]]))
def test_convert_mat_data_to_numpy_array_can_specify_the_number_of_images_to_extract(self): go_data = GoData() mock_mat_data = Mock() mock_mat_data.get.return_value = np.array([[[[1]]], [[[2]]], [[[3]]]]) transposed_array = go_data.convert_mat_data_to_numpy_array(mock_mat_data, 'fake variable', number_of_samples=2) assert np.array_equal(transposed_array, np.array([[[[1]]], [[[2]]]]))
def test_convert_mat_file_to_numpy_file_writes_extracted_numpys_to_files(self, mock_numpy_save, h5py_file_mock): go_data = GoData() go_data.convert_mat_data_to_numpy_array = Mock(side_effect=[1, 2]) go_data.crop_data = lambda x: x go_data.convert_mat_file_to_numpy_file('') assert mock_numpy_save.call_args_list[0] == ((os.path.join('images_') + '.npy', 1),) assert mock_numpy_save.call_args_list[1] == ((os.path.join('depths_') + '.npy', 2),)
def test_convert_mat_data_to_numpy_array_extracts_and_tranposes_the_data(self): go_data = GoData() mock_mat_data = Mock() mock_mat_data.get.return_value = np.array([[[[1, 2, 3]]]]) transposed_array = go_data.convert_mat_data_to_numpy_array(mock_mat_data, 'fake variable') assert mock_mat_data.get.call_args == (('fake variable',),) assert np.array_equal(transposed_array, np.array([[[[1]], [[2]], [[3]]]]))
def test_convert_mat_data_to_numpy_array_extracts_and_tranposes_the_data(self): go_data = GoData() mock_mat_data = Mock() mock_mat_data.get.return_value = np.array([[[[1, 2, 3]]]]) transposed_array = go_data.convert_mat_data_to_numpy_array(mock_mat_data, 'images') assert mock_mat_data.get.call_args == (('images',),) assert np.array_equal(transposed_array, np.array([[[[1]], [[2]], [[3]]]]))
def test_convert_mat_file_to_numpy_file_reads_the_mat_file(self, h5py_file_mock, mock_numpy_save): mat_file_name = 'fake name' go_data = GoData() go_data.crop_data = Mock() go_data.convert_mat_data_to_numpy_array = Mock() go_data.convert_mat_file_to_numpy_file(mat_file_name) assert h5py_file_mock.call_args == ((mat_file_name, 'r'),)
def test_convert_mat_file_to_numpy_file_calls_extract_mat_data_to_numpy_array(self, h5py_file_mock, mock_numpy_save): h5py_file_mock.return_value = 'fake mat data' go_data = GoData() go_data.convert_mat_data_to_numpy_array = Mock() go_data.crop_data = Mock() go_data.convert_mat_file_to_numpy_file('') assert go_data.convert_mat_data_to_numpy_array.call_args_list[1][0] == ('fake mat data', 'depths') assert go_data.convert_mat_data_to_numpy_array.call_args_list[0][0] == ('fake mat data', 'images')
def test_convert_mat_file_to_numpy_file_passes_can_be_called_on_a_specific_number_of_images(self, mock_numpy_save, h5py_file_mock): go_data = GoData() mock_convert = Mock() go_data.convert_mat_data_to_numpy_array = mock_convert go_data.crop_data = Mock() go_data.convert_mat_file_to_numpy_file('', number_of_samples=2) assert mock_convert.call_args[1]['number_of_samples'] == 2
def test_can_convert_from_mat_file_to_numpy_files(self): # Prepare paths. images_numpy_file_path = os.path.join('functional_tests', 'test_data', 'images_nyud_micro.npy') depths_numpy_file_path = os.path.join('functional_tests', 'test_data', 'depths_nyud_micro.npy') mat_file_path = os.path.join('functional_tests', 'test_data', 'nyud_micro.mat') # Run the conversion script. GoData().convert_mat_file_to_numpy_file(mat_file_path) # Check that the files are created. assert os.path.isfile(images_numpy_file_path) assert os.path.isfile(depths_numpy_file_path) # Check that magic values are correct when the data is reloaded from numpy files. images = np.load(images_numpy_file_path) assert images[5, 10, 10, 1] == 91 depths = np.load(depths_numpy_file_path) assert math.isclose(depths[5, 10, 10], 3.75686, abs_tol=0.001) # Clean up. remove_file_if_exists(images_numpy_file_path) remove_file_if_exists(depths_numpy_file_path)
def test_can_convert_from_mat_to_tfrecord_and_read_tfrecord(self): # Prepare paths. data_directory = os.path.join('functional_tests', 'test_data') mat_file_path = os.path.join(data_directory, 'nyud_micro.mat') tfrecords_file_path = os.path.join(data_directory, 'nyud_micro.tfrecords') # Run the conversion script. go_data = GoData(data_directory=data_directory, data_name='nyud_micro') go_data.convert_mat_to_tfrecord(mat_file_path) # Check that the file is created. assert os.path.isfile(tfrecords_file_path) # Reload data. images, depths = go_data.inputs(data_type='', batch_size=10) # Check that magic values are correct when the data is reloaded. magic_image_numbers = [ -0.17450979, -0.15882352, -0.15490195, -0.15098038, -0.14705881, -0.14313725, -0.11960781, -0.056862712, 0.0058823824 ] magic_depth_numbers = [ 1.1285654, 1.8865139, 2.104018, 2.1341071, 2.6960645, 3.318316, 3.4000545, 3.4783292, 3.7568643, 3.9500945 ] session = tf.Session() coordinator = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coordinator) try: with session.as_default(): assert np.isclose(magic_image_numbers, images.eval()[5, 10, 10, 1], atol=0.00001).any() assert np.isclose(magic_depth_numbers, depths.eval()[5, 10, 10], atol=0.00001).any() except tf.errors.OutOfRangeError: fail('Should not hit this.') finally: coordinator.request_stop() coordinator.join(threads) session.close() # Clean up. remove_file_if_exists(tfrecords_file_path)
def test_mat_data_to_numpy_for_depths_automatically_uses_the_correct_transpose(self): mock_mat_data = Mock() mock_get_array = np.empty((10, 20, 300)) mock_mat_data.get.return_value = mock_get_array.transpose() # Matlab's hdf5 gives a reverse order. array = GoData().convert_mat_data_to_numpy_array(mock_mat_data, 'images') assert array.shape == (300, 10, 20)
def test_mat_data_to_numpy_for_accelerometer_gives_correct_shape(self): mock_mat_data = Mock() mock_get_array = np.empty((300, 4)) mock_mat_data.get.return_value = mock_get_array.transpose() # Matlab's hdf5 gives a reverse order. array = GoData().convert_mat_data_to_numpy_array(mock_mat_data, 'accelData') assert array.shape == (300, 4)
def test_data_shuffling(self, mock_permutation): go_data = GoData() go_data.images = np.array([1, 2, 3]) go_data.labels = np.array(['a', 'b', 'c']) mock_permutation.return_value = [2, 0, 1] go_data.shuffle() go_data.images = np.array([3, 1, 2]) go_data.labels = np.array(['c', 'a', 'b'])
def __init__(self, message_queue=None): super().__init__() # Common variables. self.batch_size = 8 self.number_of_epochs = 50000 self.initial_learning_rate = 0.00001 self.data = GoData(data_name='nyud') self.summary_step_period = 1 self.log_directory = "logs" self.dropout_keep_probability = 0.5 # Internal setup. self.moving_average_loss = None self.moving_average_decay = 0.1 self.stop_signal = False self.step = 0 self.saver = None self.session = None self.dropout_keep_probability_tensor = tf.placeholder(tf.float32) self.queue = message_queue
def test_rebin_outputs_the_right_types_based_on_dimensions(self): four_dimensions = np.array([[[[1]]]]) # Collection of images. three_dimensions = np.array([[[1]]]) # Collection of depths. go_data = GoData() go_data.width = 1 go_data.height = 1 go_data.channels = 1 rebinned_four_dimensions = go_data.shrink_array_with_rebinning(four_dimensions) rebinned_three_dimensions = go_data.shrink_array_with_rebinning(three_dimensions) assert rebinned_four_dimensions.dtype == np.uint8 assert rebinned_three_dimensions.dtype == np.float64
def test_convert_mat_file_to_numpy_file_passes_can_be_called_on_a_specific_number_of_images( self, mock_numpy_save, h5py_file_mock): go_data = GoData() mock_convert = Mock() go_data.convert_mat_data_to_numpy_array = mock_convert go_data.crop_data = Mock() go_data.convert_mat_file_to_numpy_file('', number_of_samples=2) assert mock_convert.call_args[1]['number_of_samples'] == 2
def test_rebin_outputs_the_right_types_based_on_dimensions(self): four_dimensions = np.array([[[[1]]]]) # Collection of images. three_dimensions = np.array([[[1]]]) # Collection of depths. go_data = GoData() go_data.image_width = 1 go_data.image_height = 1 go_data.image_depth = 1 rebinned_four_dimensions = go_data.shrink_array_with_rebinning(four_dimensions) rebinned_three_dimensions = go_data.shrink_array_with_rebinning(three_dimensions) assert rebinned_four_dimensions.dtype == np.uint8 assert rebinned_three_dimensions.dtype == np.float64
def test_data_path_property(self): go_data = GoData() go_data.data_directory = 'directory' go_data.data_name = 'file_name' assert go_data.data_path == os.path.join('directory', 'file_name')
class GoNet(multiprocessing.Process): """ The class to build and interact with the GoNet TensorFlow graph. """ def __init__(self, message_queue=None): super().__init__() # Common variables. self.batch_size = 8 self.number_of_epochs = 50000 self.initial_learning_rate = 0.00001 self.data = GoData(data_name='nyud') self.summary_step_period = 1 self.log_directory = "logs" self.dropout_keep_probability = 0.5 # Internal setup. self.moving_average_loss = None self.moving_average_decay = 0.1 self.stop_signal = False self.step = 0 self.saver = None self.session = None self.dropout_keep_probability_tensor = tf.placeholder(tf.float32) self.queue = message_queue def create_inference_op(self, images): """ Performs a forward pass estimating label maps from RGB images. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ return self.create_linear_classifier_inference_op(images) def create_deep_inference_op(self, images): """ Performs a forward pass estimating label maps from RGB images using a deep convolution net. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ with tf.name_scope('conv1'): w_conv = weight_variable([5, 5, 3, 32]) b_conv = bias_variable([32]) h_conv = leaky_relu(conv2d(images, w_conv) + b_conv) with tf.name_scope('conv2'): w_conv = weight_variable([5, 5, 32, 128]) b_conv = bias_variable([128]) h_conv = leaky_relu(conv2d(h_conv, w_conv) + b_conv) for index in range(9): with tf.name_scope('conv' + str(index + 3)): w_conv = weight_variable([5, 5, 128, 128]) b_conv = bias_variable([128]) h_conv = leaky_relu(conv2d(h_conv, w_conv) + b_conv) with tf.name_scope('conv12'): w_conv = weight_variable([5, 5, 128, 32]) b_conv = bias_variable([32]) h_conv = leaky_relu(conv2d(h_conv, w_conv) + b_conv) with tf.name_scope('fc1'): fc0_size = self.data.height * self.data.width * 32 fc1_size = fc0_size // 4096 h_fc = tf.reshape(h_conv, [-1, fc0_size]) w_fc = weight_variable([fc0_size, fc1_size]) b_fc = bias_variable([fc1_size]) h_fc = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) h_fc_drop = tf.nn.dropout(h_fc, self.dropout_keep_probability_tensor) with tf.name_scope('fc2'): fc2_size = fc1_size // 2 w_fc = weight_variable([fc1_size, fc2_size]) b_fc = bias_variable([fc2_size]) h_fc = leaky_relu(tf.matmul(h_fc_drop, w_fc) + b_fc) h_fc_drop = tf.nn.dropout(h_fc, self.dropout_keep_probability_tensor) with tf.name_scope('fc3'): fc3_size = self.data.height * self.data.width w_fc = weight_variable([fc2_size, fc3_size]) b_fc = bias_variable([fc3_size]) h_fc = leaky_relu(tf.matmul(h_fc_drop, w_fc) + b_fc) predicted_labels = tf.reshape(h_fc, [-1, self.data.height, self.data.width, 1]) return predicted_labels def standard_net_inference(self, images): """ Performs a forward pass estimating label maps from RGB images using a AlexNet-like graph setup. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ with tf.name_scope('conv1'): w_conv = weight_variable([7, 7, 3, 16]) b_conv = bias_variable([16]) h_conv = leaky_relu(conv2d(images, w_conv) + b_conv) with tf.name_scope('conv2'): w_conv = weight_variable([7, 7, 16, 24]) b_conv = bias_variable([24]) h_conv = leaky_relu(conv2d(h_conv, w_conv, [1, 2, 2, 1]) + b_conv) with tf.name_scope('conv3'): w_conv = weight_variable([7, 7, 24, 32]) b_conv = bias_variable([32]) h_conv = leaky_relu(conv2d(h_conv, w_conv, [1, 2, 2, 1]) + b_conv) with tf.name_scope('fc1'): fc0_size = size_from_stride_two(self.data.height, iterations=2) * size_from_stride_two(self.data.width, iterations=2) * 32 fc1_size = fc0_size // 2 h_fc = tf.reshape(h_conv, [-1, fc0_size]) w_fc = weight_variable([fc0_size, fc1_size]) b_fc = bias_variable([fc1_size]) h_fc = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) with tf.name_scope('fc2'): fc2_size = fc1_size // 2 w_fc = weight_variable([fc1_size, fc2_size]) b_fc = bias_variable([fc2_size]) h_fc = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) with tf.name_scope('fc3'): fc3_size = self.data.height * self.data.width w_fc = weight_variable([fc2_size, fc3_size]) b_fc = bias_variable([fc3_size]) h_fc = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) predicted_labels = tf.reshape(h_fc, [-1, self.data.height, self.data.width, 1]) return predicted_labels def create_linear_classifier_inference_op(self, images): """ Performs a forward pass estimating label maps from RGB images using only a linear classifier. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ pixel_count = self.data.height * self.data.width flat_images = tf.reshape(images, [-1, pixel_count * self.data.channels]) weights = weight_variable([pixel_count * self.data.channels, pixel_count], stddev=0.001) biases = bias_variable([pixel_count], constant=0.001) flat_predicted_labels = tf.matmul(flat_images, weights) + biases predicted_labels = tf.reshape(flat_predicted_labels, [-1, self.data.height, self.data.width, 1]) return predicted_labels def create_loss_tensor(self, predicted_labels, labels): """ Create the loss op and add it to the graph. :param predicted_labels: The labels predicted by the graph. :type predicted_labels: tf.Tensor :param labels: The ground truth labels. :type labels: tf.Tensor :return: The loss tensor. :rtype: tf.Tensor """ return self.relative_differences(predicted_labels, labels) @staticmethod def relative_differences(predicted_labels, labels): """ Determines the absolute L1 relative differences between two label maps. :param predicted_labels: The first label map tensor (usually the predicted labels). :type predicted_labels: tf.Tensor :param labels: The second label map tensor (usually the actual labels). :type labels: tf.Tensor :return: The difference tensor. :rtype: tf.Tensor """ difference = tf.abs(predicted_labels - labels) return difference / labels def create_training_op(self, value_to_minimize): """ Create and add the training op to the graph. :param value_to_minimize: The value to train on. :type value_to_minimize: tf.Tensor :return: The training op. :rtype: tf.Operation """ return tf.train.AdamOptimizer(self.initial_learning_rate).minimize(value_to_minimize) @staticmethod def convert_to_heat_map_rgb(tensor): """ Convert a tensor to a heat map. :param tensor: The tensor values to be converted. :type tensor: tf.Tensor :return: The heat map image tensor. :rtype: tf.Tensor """ maximum = tf.reduce_max(tensor) minimum = tf.reduce_min(tensor) ratio = 2 * (tensor - minimum) / (maximum - minimum) b = tf.maximum(0.0, (1 - ratio)) r = tf.maximum(0.0, (ratio - 1)) g = 1 - b - r return tf.concat(3, [r, g, b]) - 0.5 def image_comparison_summary(self, images, labels, predicted_labels, label_differences): """ Combines the image, label, and difference tensors together into a presentable image. Then adds the image summary op to the graph. :param images: The original image. :type images: tf.Tensor :param labels: The tensor containing the actual label values. :type labels: tf.Tensor :param predicted_labels: The tensor containing the predicted labels. :type predicted_labels: tf.Tensor :param label_differences: The tensor containing the difference between the actual and predicted labels. :type label_differences: tf.Tensor """ label_heat_map = self.convert_to_heat_map_rgb(labels) predicted_label_heat_map = self.convert_to_heat_map_rgb(predicted_labels) label_difference_heat_map = self.convert_to_heat_map_rgb(label_differences) comparison_image = tf.concat(1, [images, label_heat_map, predicted_label_heat_map, label_difference_heat_map]) tf.image_summary('comparison', comparison_image) def interface_handler(self): """ Handle input from the user using the interface. """ if self.queue: if not self.queue.empty(): message = self.queue.get(block=False) if message == 'save': save_path = self.saver.save(self.session, os.path.join('models', 'depthnet.ckpt'), global_step=self.step) tf.train.write_graph(self.session.graph_def, 'models', 'depthnet.pb') print("Model saved in file: %s" % save_path) if message == 'quit': self.stop_signal = True def train(self): """ Adds the training operations and runs the training loop. """ print('Preparing data...') # Setup the inputs. images_tensor, labels_tensor = self.data.inputs(data_type='', batch_size=self.batch_size, num_epochs=self.number_of_epochs) print('Building graph...') # Add the forward pass operations to the graph. predicted_labels_tensor = self.create_inference_op(images_tensor) # Add the loss operations to the graph. with tf.name_scope('loss'): loss_tensor = self.create_loss_tensor(predicted_labels_tensor, labels_tensor) loss_per_pixel_tensor = tf.reduce_mean(loss_tensor) tf.scalar_summary("Loss per pixel", loss_per_pixel_tensor) with tf.name_scope('comparison_summary'): self.image_comparison_summary(images_tensor, labels_tensor, predicted_labels_tensor, loss_tensor) # Add the training operations to the graph. training_op = self.create_training_op(value_to_minimize=loss_per_pixel_tensor) # The op for initializing the variables. initialize_op = tf.initialize_all_variables() # Prepare session. self.session = tf.Session() # Prepare the summary operations. summaries_op = tf.merge_all_summaries() summary_path = os.path.join(self.log_directory, datetime.datetime.now().strftime("y%Y_m%m_d%d_h%H_m%M_s%S")) writer = tf.train.SummaryWriter(summary_path, self.session.graph) # Prepare saver. self.saver = tf.train.Saver() print('Starting training...') # Initialize the variables. self.session.run(initialize_op) # Start input enqueue threads. coordinator = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=self.session, coord=coordinator) # Preform the training loop. try: while not coordinator.should_stop() and not self.stop_signal: # Regular training step. start_time = time.time() _, loss, summaries = self.session.run( [training_op, loss_per_pixel_tensor, summaries_op], feed_dict={self.dropout_keep_probability_tensor: self.dropout_keep_probability} ) duration = time.time() - start_time # Information print and summary write step. if self.step % self.summary_step_period == 0: writer.add_summary(summaries, self.step) print('Step %d: Loss per pixel = %.5f (%.3f sec / step)' % (self.step, loss, duration)) self.step += 1 # Handle interface messages from the user. self.interface_handler() except tf.errors.OutOfRangeError: if self.step == 0: print('Training data not found.') else: print('Done training for %d epochs, %d steps.' % (self.number_of_epochs, self.step)) finally: # When done, ask the threads to stop. coordinator.request_stop() # Wait for threads to finish. coordinator.join(threads) self.session.close() def run(self): """ Allow for training the network from a multiprocessing standpoint. """ self.train()
class GoNet(multiprocessing.Process): """ The class to build and interact with the GoNet TensorFlow graph. """ def __init__(self, message_queue=None): super().__init__() # Common variables. self.batch_size = 8 self.number_of_epochs = 50000 self.initial_learning_rate = 0.00001 self.data = GoData(data_name='nyud') self.summary_step_period = 1 self.log_directory = "logs" self.dropout_keep_probability = 0.5 # Internal setup. self.moving_average_loss = None self.moving_average_decay = 0.1 self.stop_signal = False self.step = 0 self.saver = None self.session = None self.dropout_keep_probability_tensor = tf.placeholder(tf.float32) self.queue = message_queue def create_inference_op(self, images): """ Performs a forward pass estimating label maps from RGB images. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ return self.create_linear_classifier_inference_op(images) def create_deep_inference_op(self, images): """ Performs a forward pass estimating label maps from RGB images using a deep convolution net. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ with tf.name_scope('conv1'): w_conv = weight_variable([5, 5, 3, 32]) b_conv = bias_variable([32]) h_conv = leaky_relu(conv2d(images, w_conv) + b_conv) with tf.name_scope('conv2'): w_conv = weight_variable([5, 5, 32, 128]) b_conv = bias_variable([128]) h_conv = leaky_relu(conv2d(h_conv, w_conv) + b_conv) for index in range(9): with tf.name_scope('conv' + str(index + 3)): w_conv = weight_variable([5, 5, 128, 128]) b_conv = bias_variable([128]) h_conv = leaky_relu(conv2d(h_conv, w_conv) + b_conv) with tf.name_scope('conv12'): w_conv = weight_variable([5, 5, 128, 32]) b_conv = bias_variable([32]) h_conv = leaky_relu(conv2d(h_conv, w_conv) + b_conv) with tf.name_scope('fc1'): fc0_size = self.data.height * self.data.width * 32 fc1_size = fc0_size // 4096 h_fc = tf.reshape(h_conv, [-1, fc0_size]) w_fc = weight_variable([fc0_size, fc1_size]) b_fc = bias_variable([fc1_size]) h_fc = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) h_fc_drop = tf.nn.dropout(h_fc, self.dropout_keep_probability_tensor) with tf.name_scope('fc2'): fc2_size = fc1_size // 2 w_fc = weight_variable([fc1_size, fc2_size]) b_fc = bias_variable([fc2_size]) h_fc = leaky_relu(tf.matmul(h_fc_drop, w_fc) + b_fc) h_fc_drop = tf.nn.dropout(h_fc, self.dropout_keep_probability_tensor) with tf.name_scope('fc3'): fc3_size = self.data.height * self.data.width w_fc = weight_variable([fc2_size, fc3_size]) b_fc = bias_variable([fc3_size]) h_fc = leaky_relu(tf.matmul(h_fc_drop, w_fc) + b_fc) predicted_labels = tf.reshape( h_fc, [-1, self.data.height, self.data.width, 1]) return predicted_labels def standard_net_inference(self, images): """ Performs a forward pass estimating label maps from RGB images using a AlexNet-like graph setup. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ with tf.name_scope('conv1'): w_conv = weight_variable([7, 7, 3, 16]) b_conv = bias_variable([16]) h_conv = leaky_relu(conv2d(images, w_conv) + b_conv) with tf.name_scope('conv2'): w_conv = weight_variable([7, 7, 16, 24]) b_conv = bias_variable([24]) h_conv = leaky_relu(conv2d(h_conv, w_conv, [1, 2, 2, 1]) + b_conv) with tf.name_scope('conv3'): w_conv = weight_variable([7, 7, 24, 32]) b_conv = bias_variable([32]) h_conv = leaky_relu(conv2d(h_conv, w_conv, [1, 2, 2, 1]) + b_conv) with tf.name_scope('fc1'): fc0_size = size_from_stride_two( self.data.height, iterations=2) * size_from_stride_two( self.data.width, iterations=2) * 32 fc1_size = fc0_size // 2 h_fc = tf.reshape(h_conv, [-1, fc0_size]) w_fc = weight_variable([fc0_size, fc1_size]) b_fc = bias_variable([fc1_size]) h_fc = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) with tf.name_scope('fc2'): fc2_size = fc1_size // 2 w_fc = weight_variable([fc1_size, fc2_size]) b_fc = bias_variable([fc2_size]) h_fc = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) with tf.name_scope('fc3'): fc3_size = self.data.height * self.data.width w_fc = weight_variable([fc2_size, fc3_size]) b_fc = bias_variable([fc3_size]) h_fc = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) predicted_labels = tf.reshape( h_fc, [-1, self.data.height, self.data.width, 1]) return predicted_labels def create_linear_classifier_inference_op(self, images): """ Performs a forward pass estimating label maps from RGB images using only a linear classifier. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ pixel_count = self.data.height * self.data.width flat_images = tf.reshape(images, [-1, pixel_count * self.data.channels]) weights = weight_variable( [pixel_count * self.data.channels, pixel_count], stddev=0.001) biases = bias_variable([pixel_count], constant=0.001) flat_predicted_labels = tf.matmul(flat_images, weights) + biases predicted_labels = tf.reshape( flat_predicted_labels, [-1, self.data.height, self.data.width, 1]) return predicted_labels def create_loss_tensor(self, predicted_labels, labels): """ Create the loss op and add it to the graph. :param predicted_labels: The labels predicted by the graph. :type predicted_labels: tf.Tensor :param labels: The ground truth labels. :type labels: tf.Tensor :return: The loss tensor. :rtype: tf.Tensor """ return self.relative_differences(predicted_labels, labels) @staticmethod def relative_differences(predicted_labels, labels): """ Determines the absolute L1 relative differences between two label maps. :param predicted_labels: The first label map tensor (usually the predicted labels). :type predicted_labels: tf.Tensor :param labels: The second label map tensor (usually the actual labels). :type labels: tf.Tensor :return: The difference tensor. :rtype: tf.Tensor """ difference = tf.abs(predicted_labels - labels) return difference / labels def create_training_op(self, value_to_minimize): """ Create and add the training op to the graph. :param value_to_minimize: The value to train on. :type value_to_minimize: tf.Tensor :return: The training op. :rtype: tf.Operation """ return tf.train.AdamOptimizer( self.initial_learning_rate).minimize(value_to_minimize) @staticmethod def convert_to_heat_map_rgb(tensor): """ Convert a tensor to a heat map. :param tensor: The tensor values to be converted. :type tensor: tf.Tensor :return: The heat map image tensor. :rtype: tf.Tensor """ maximum = tf.reduce_max(tensor) minimum = tf.reduce_min(tensor) ratio = 2 * (tensor - minimum) / (maximum - minimum) b = tf.maximum(0.0, (1 - ratio)) r = tf.maximum(0.0, (ratio - 1)) g = 1 - b - r return tf.concat(3, [r, g, b]) - 0.5 def image_comparison_summary(self, images, labels, predicted_labels, label_differences): """ Combines the image, label, and difference tensors together into a presentable image. Then adds the image summary op to the graph. :param images: The original image. :type images: tf.Tensor :param labels: The tensor containing the actual label values. :type labels: tf.Tensor :param predicted_labels: The tensor containing the predicted labels. :type predicted_labels: tf.Tensor :param label_differences: The tensor containing the difference between the actual and predicted labels. :type label_differences: tf.Tensor """ label_heat_map = self.convert_to_heat_map_rgb(labels) predicted_label_heat_map = self.convert_to_heat_map_rgb( predicted_labels) label_difference_heat_map = self.convert_to_heat_map_rgb( label_differences) comparison_image = tf.concat(1, [ images, label_heat_map, predicted_label_heat_map, label_difference_heat_map ]) tf.image_summary('comparison', comparison_image) def interface_handler(self): """ Handle input from the user using the interface. """ if self.queue: if not self.queue.empty(): message = self.queue.get(block=False) if message == 'save': save_path = self.saver.save(self.session, os.path.join( 'models', 'depthnet.ckpt'), global_step=self.step) tf.train.write_graph(self.session.graph_def, 'models', 'depthnet.pb') print("Model saved in file: %s" % save_path) if message == 'quit': self.stop_signal = True def train(self): """ Adds the training operations and runs the training loop. """ print('Preparing data...') # Setup the inputs. images_tensor, labels_tensor = self.data.inputs( data_type='', batch_size=self.batch_size, num_epochs=self.number_of_epochs) print('Building graph...') # Add the forward pass operations to the graph. predicted_labels_tensor = self.create_inference_op(images_tensor) # Add the loss operations to the graph. with tf.name_scope('loss'): loss_tensor = self.create_loss_tensor(predicted_labels_tensor, labels_tensor) loss_per_pixel_tensor = tf.reduce_mean(loss_tensor) tf.scalar_summary("Loss per pixel", loss_per_pixel_tensor) with tf.name_scope('comparison_summary'): self.image_comparison_summary(images_tensor, labels_tensor, predicted_labels_tensor, loss_tensor) # Add the training operations to the graph. training_op = self.create_training_op( value_to_minimize=loss_per_pixel_tensor) # The op for initializing the variables. initialize_op = tf.initialize_all_variables() # Prepare session. self.session = tf.Session() # Prepare the summary operations. summaries_op = tf.merge_all_summaries() summary_path = os.path.join( self.log_directory, datetime.datetime.now().strftime("y%Y_m%m_d%d_h%H_m%M_s%S")) writer = tf.train.SummaryWriter(summary_path, self.session.graph) # Prepare saver. self.saver = tf.train.Saver() print('Starting training...') # Initialize the variables. self.session.run(initialize_op) # Start input enqueue threads. coordinator = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=self.session, coord=coordinator) # Preform the training loop. try: while not coordinator.should_stop() and not self.stop_signal: # Regular training step. start_time = time.time() _, loss, summaries = self.session.run( [training_op, loss_per_pixel_tensor, summaries_op], feed_dict={ self.dropout_keep_probability_tensor: self.dropout_keep_probability }) duration = time.time() - start_time # Information print and summary write step. if self.step % self.summary_step_period == 0: writer.add_summary(summaries, self.step) print('Step %d: Loss per pixel = %.5f (%.3f sec / step)' % (self.step, loss, duration)) self.step += 1 # Handle interface messages from the user. self.interface_handler() except tf.errors.OutOfRangeError: if self.step == 0: print('Training data not found.') else: print('Done training for %d epochs, %d steps.' % (self.number_of_epochs, self.step)) finally: # When done, ask the threads to stop. coordinator.request_stop() # Wait for threads to finish. coordinator.join(threads) self.session.close() def run(self): """ Allow for training the network from a multiprocessing standpoint. """ self.train()