def test_get_base_net(self): with self.test_session() as sess: image_input_batch = tf.placeholder(tf.float32, shape=(1, 600, 600, 3)) roi_input_batch = tf.placeholder(tf.float32, shape=(None, 4)) he_init = tf.contrib.layers.variance_scaling_initializer() target = rcnn_net.get_base_net(15, 400, image_input_batch, roi_input_batch, he_init) # Initialization has to happen after defining the graph sess.run(tf.global_variables_initializer()) # Testing integration with data reader training_reader = reader.DatasetReader( ["test/data/test-batch-reader-dataset/rcnn_dataset_12276"], 1, 64, 16) training_batch = training_reader.get_batch() result_last_op, result_pool_layer = sess.run( target, feed_dict={ image_input_batch: training_batch["images"], roi_input_batch: training_batch["rois"] }) assert result_last_op.shape == (64, 400) # 64 rois, each with a 7x7x64 pooling output assert result_pool_layer.shape == (64, 7, 7, 64)
def test(self, prediction, test_batch_files): """ This function detects and classifies objects in the given images :param prediction: tensorflow operator to detect objects (will be run using the session) :param test_batch_files: list of files to use to test the net """ # If this model was already partially trained before, load it from disk # If model was trained in this same execution, the load already happened, so we # can skip it here if not self._config.config.get_model_train( ) and self._config.get_model_load(): # Restore variables from disk. tf.train.Saver().restore(self._sess, self._config.get_model_path()) print("Model restored.") print("Starting prediction") prediction_start_time = time.time() # It generates batches from the list of test files test_reader = ds_reader.DatasetReader( test_batch_files, self._config.get_number_images_batch(), self._config.get_number_rois_per_image_batch(), self._config.get_number_max_foreground_rois_per_image_batch()) test_batch = test_reader.get_batch() while test_batch != {}: predicted_classes = self._sess.run(prediction, feed_dict={ self._image_input_batch: test_batch["images"], self._roi_input_batch: test_batch["rois"] }) # Logging information about the prediction to be able to analyze it later output_analyzer.write_predictions_to_file( self._config.get_test_output_file(), test_batch["gt_objects"], np.transpose(predicted_classes, axes=[1, 0, 2])) test_batch = test_reader.get_batch() print("Done predicting. It took {0} minutes".format( (time.time() - prediction_start_time) / 60))
def test_net(self): with self.test_session() as sess: image_input_batch = tf.placeholder(tf.float32, shape=(1, 600, 600, 3)) roi_input_batch = tf.placeholder(tf.float32, shape=(None, 4)) class_label_batch = tf.placeholder(tf.float32, shape=(None, 21)) detection_label_batch = tf.placeholder(tf.float32, shape=(None, 4)) learning_rate = tf.placeholder(tf.float32, name="LearningRate") target = rcnn_net.get_net(21, 4, 15, 400, image_input_batch, roi_input_batch, class_label_batch, detection_label_batch, learning_rate) sess.run(tf.global_variables_initializer()) # Testing integration with data reader training_reader = reader.DatasetReader( ["test/data/test-batch-reader-dataset/rcnn_dataset_12276"], 1, 64, 16) training_batch = training_reader.get_batch() class_label_batch_test = np.zeros((64, 21)) detection_label_batch_test = np.zeros((64, 4)) learning_rate_manager = rm.LearningRateManager(0.001, 0.6, 80) result_loss, result_training, result_test = sess.run( target, feed_dict={ image_input_batch: training_batch["images"], roi_input_batch: training_batch["rois"], class_label_batch: class_label_batch_test, detection_label_batch: detection_label_batch_test, learning_rate: learning_rate_manager.learning_rate }) assert result_loss.shape == (64, 1) assert result_test.shape[1] == 5
def test_read_mini_batches(self): input_folder = "test/data/test-batch-reader-dataset/batch/" input_files = [input_folder + "rcnn_dataset_0", input_folder + "rcnn_dataset_1"] target = dataset_reader.DatasetReader(input_files, 1, 64, 16) # BATCH 1 -> One image batch1 = target.get_batch() assert batch1["images"].shape == (1, 600, 600, 3) assert batch1["rois"].shape == (64, 4) assert batch1["class_labels"].shape == (64, 21) assert batch1["reg_target_labels"].shape == (64, 4) # Checking that there are 59 background rois for every image assert np.sum(batch1["class_labels"], axis=0)[0] == 59 # Checking ground truth information assert batch1["gt_objects"].shape == (4,) np.testing.assert_equal( batch1["gt_objects"][0], {"class": "aeroplane", "bbox": np.array([124, 166, 325, 224])}) np.testing.assert_equal( batch1["gt_objects"][1], {"class": "aeroplane", "bbox": np.array([159, 187, 76, 74])}) np.testing.assert_equal( batch1["gt_objects"][2], {"class": "person", "bbox": np.array([234, 384, 21, 104])}) np.testing.assert_equal( batch1["gt_objects"][3], {"class": "person", "bbox": np.array([31, 403, 21, 104])}) # BATCH 2 -> One image batch2 = target.get_batch() assert batch2["images"].shape == (1, 600, 600, 3) assert batch2["rois"].shape == (64, 4) assert batch2["class_labels"].shape == (64, 21) assert batch2["reg_target_labels"].shape == (64, 4) # Checking that there are 59 background rois for every image assert np.sum(batch2["class_labels"], axis=0)[0] == 59 # Checking ground truth information assert batch2["gt_objects"].shape == (3,) np.testing.assert_equal( batch2["gt_objects"][0], {"class": "aeroplane", "bbox": np.array([10, 175, 588, 255])}) np.testing.assert_equal( batch2["gt_objects"][1], {"class": "aeroplane", "bbox": np.array([505, 327, 73, 42])}) np.testing.assert_equal( batch2["gt_objects"][2], {"class": "aeroplane", "bbox": np.array([390, 308, 103, 57])}) # BATCH 3 -> One image, change in file batch3 = target.get_batch() assert batch3["images"].shape == (1, 600, 600, 3) assert batch3["rois"].shape == (64, 4) assert batch3["class_labels"].shape == (64, 21) # Checking that there are 59 background rois for the image assert np.sum(batch3["class_labels"], axis=0)[0] == 59 assert batch3["reg_target_labels"].shape == (64, 4) # Checking ground truth information assert batch3["gt_objects"].shape == (1,) np.testing.assert_equal( {"class": "person", "bbox": np.array([214, 121, 216, 300])}, batch3["gt_objects"][0]) # BATCH 4 -> Emtpy, there are no more images batch4 = target.get_batch() assert batch4 == {}
def train_net(self, training, multitask_loss, training_batch_files): """ This function trains the rcnn network :param training: tensorflow operator to train the network (will be run using the session) :param multitask_loss: tensorflow operator to get the result of the multitask loss. This info will be logged to be able to analyze it later :param training_batch_files: list of files to use to train the net """ # Used to save and restore the model variables saver = tf.train.Saver() # If this model was already partially training before, load it from disk if self._config.get_model_load(): # Restore variables from disk. saver.restore(self._sess, self._config.get_model_path()) print("Model restored.") print("Starting training") training_start_time = time.time() iteration = 0 learning_rate_manager = rm.LearningRateManager( self._config.get_learning_rate_initial_value(), self._config.get_learning_rate_manager_threshold(), self._config.get_learning_rate_manager_steps()) for epoch in range(0, self._config.get_number_epochs()): print("Epoch: {0}".format(str(epoch))) # Training with all the PASCAL VOC records for each epoch # We train with 1 image per batch and 64 rois per image. From those 64, we'll use a max # of 16 foreground images. The rest will be background. training_reader = ds_reader.DatasetReader( training_batch_files, self._config.get_number_images_batch(), self._config.get_number_rois_per_image_batch(), self._config.get_number_max_foreground_rois_per_image_batch()) training_batch = training_reader.get_batch() # Empty batch means we are done processing all images and rois for this epoch while training_batch != {}: _, loss = self._sess.run( [training, multitask_loss], feed_dict={ self._image_input_batch: training_batch["images"], self._roi_input_batch: training_batch["rois"], self._class_label_batch: training_batch["class_labels"], self._detection_label_batch: training_batch["reg_target_labels"], self._learning_rate: learning_rate_manager.learning_rate }) # Logging information about the multitask loss to be able to analyze it later output_analyzer.write_error_to_file( self._config.get_training_error_file(), iteration, loss) # Adding error to learning rate manager so it can calculate when to reduce it learning_rate_manager.add_error(loss) iteration = iteration + 1 training_batch = training_reader.get_batch() # Save model variables to disk if self._config.get_model_save(): save_path = saver.save(self._sess, self._config.get_model_path()) print("Model saved in path: {0} for epoch {1}".format( save_path, epoch)) print( "Initial learning rate to use when training in the future: {0}" .format(str(learning_rate_manager.learning_rate))) print("Done training. It took {0} minutes".format( (time.time() - training_start_time) / 60))