def _run_training(self, bottleneck_in_size=None): """Run the training with current settings.""" self._tw_training_complete = TimeWatcher("SingleNetTrainingComplete") # load dataset ds = self._load_dataset() ds.log_stats() # create the neural network log.log('Creating the network') self._net = nt.NetTrainable( dataset=ds, snapshot_full_path=cf.get("snapshot_full_path"), f_beta=self._f_beta, bottleneck_in_size=bottleneck_in_size, nr=self.net_nr, nr_max=self.net_nr_max, use_inception_architecture=self.use_inception_architecture) # log some relevant configuration settings before starting the training self._log_current_config() # actually train the net self._net.train() # training has been completed correctly, finalize this session self._finalize_latest_session()
def run_inference_on_image(self, image: ImageInfo) -> List[LabeledBoundingBox]: """Run inference on a single image. :param image: The image to process. Not yet split into windows. :return: A list containing all bounding boxes belonging to the foreground classes. """ # ensure that TensorFlow has been initialized self._init_tf() log.log(" ") log.log("#################################################################") log.log("Run inference on {}".format(image.basename)) tw_img_total = TimeWatcher("Run inference on {}".format(image.basename)) # split the current image into sliding windows # windows_raw contains only the image patches and is ready to be fed into the network # windows_info contains further meta data about the windows log.log(" -> extracting windows") windows_raw, windows_info = self._extract_windows(image) # collect all found foreground bounding boxes of this image in the following list img_results = self.run_inference_on_windows(windows_info, windows_raw) tw_img_total.stop() # log log.log("-> final result: {}/{} ({:.2f}%) positive windows".format( len(img_results), len(windows_info), len(img_results) / len(windows_info) * 100 )) log.log("#################################################################") log.log(" ") return img_results
def _check_files(self): """Test all loaded input images for TensorFlow compatibility.""" if not self._files_checked and cf.get( "remove_broken_images_before_training"): tw_broken_images = TimeWatcher("RemoveBrokenImages") FileListLoader().remove_broken_images() tw_broken_images.stop() self._files_checked = True
class BaseApp(metaclass=abc.ABCMeta): """This class is the base class of all "apps" created in this project.""" def __init__(self, run_now=True): """Create a new BaseApp. :param run_now: Whether this app should run right now. """ self._time_watcher = None # overclock GPU if cf.get("overclock_gpu_shell") is not None: log.log( "automatically overclocking the GPU by using the following shell script: {}" .format(cf.get("overclock_gpu_shell"))) call(cf.get("overclock_gpu_shell"), shell=True) if run_now: self.run() def run(self): """Run this app. This method is wrapping the main method to introduce some additional events. :return: """ self._time_watcher = TimeWatcher( os.path.basename(sys.argv[0]).replace(".py", "")) try: self._main() except KeyboardInterrupt: log.log("WARNING: User interrupted progress.") self._on_cancel() self._on_finished() self._time_watcher.stop() @abc.abstractmethod def _main(self): """This method will be called on object initialization to run the actual programme.""" return def _on_cancel(self): """This method will be called when the user interrupted the main method.""" return def _on_finished(self): """This method will be called when the main method is done (either finished regularly or cancelled).""" # save log files log.log_set_name(self.__class__.__name__) # we don't flush the log here, because other apps include each other log.log_save(cf.get("log_dir"), flush=False) return
def _run_nms(candidates_bboxes: List[LabeledBoundingBox]) -> List[LabeledBoundingBox]: """Run Non-Maximum Suppression on the given bounding boxes. :param candidates_bboxes: :return: """ log.log("Non-Maximum Suppression: {} ({} candidates)".format(cf.get("nms"), len(candidates_bboxes))) if cf.get("nms") == cf.NMS_DISABLED: return candidates_bboxes tw_nms = TimeWatcher("NMS") results = [] if cf.get("nms") == cf.NMS_OPENCV: # prepare NMS by converting all bbox objects into the format required by OpenCV log.log(" -> prepare NMS by converting all bbox objects into the format required by OpenCV") candidates_opencv_infos_per_img = dict() # each element is (x, y, w, h) image_infos_per_key = dict() for bbox in candidates_bboxes: if bbox.image.full_key not in candidates_opencv_infos_per_img: candidates_opencv_infos_per_img[bbox.image.full_key] = [] image_infos_per_key[bbox.image.full_key] = bbox.image # TODO this does not take the original confidence into account! (that's why the different calc methods don't make any difference, too.) opencv_info = (bbox.xmin, bbox.ymin, bbox.width, bbox.height) candidates_opencv_infos_per_img[bbox.image.full_key].append(opencv_info) # actually run NMS min_neighbors = cf.get("nms_opencv_min_neighbors") log.log(" -> actually run NMS with a threshold of {}".format(min_neighbors)) for img_key, candidates_opencv_infos in candidates_opencv_infos_per_img.items(): results_opencv_infos, weights = cv2.groupRectangles(candidates_opencv_infos, min_neighbors) # convert the OpenCV information to bbox objects again # log.log(" -> convert the OpenCV information to bbox objects again") label_object = label.get_by_iid(label.IID_FOREGROUND) for i in range(len(results_opencv_infos)): (x, y, w, h) = results_opencv_infos[i] # note that, the new confidence score isn't normalized confidence = float(weights[i]) bbox = LabeledBoundingBox(x, y, x + w, y + h, label_object, confidence, image_infos_per_key[img_key]) results.append(bbox) tw_nms.stop() log.log(" -> kept {}/{} windows".format(len(results), len(candidates_bboxes))) return results
def run(self): """Run this app. This method is wrapping the main method to introduce some additional events. :return: """ self._time_watcher = TimeWatcher( os.path.basename(sys.argv[0]).replace(".py", "")) try: self._main() except KeyboardInterrupt: log.log("WARNING: User interrupted progress.") self._on_cancel() self._on_finished() self._time_watcher.stop()
def _load_file_lists(self): """Load all file lists for all datasets.""" tw = TimeWatcher("FileListLoading") log.log("Load file lists for dataset(s): {}".format( cf.get("dataset_keys") )) log.log(".. Required image dimension: {}x{}px".format(cf.get("img_width"), cf.get("img_height"))) self._image_infos_per_dataset, self._image_infos, self._image_infos_per_iid_label = self._check_filelist_cache_combined() if self._image_infos_per_dataset is None or self._image_infos is None or self._image_infos_per_iid_label is None: self._image_infos_per_dataset = dict() self._image_infos = [] self._image_infos_per_iid_label = dict() # load each dataset separately for dataset_key in cf.get("dataset_keys"): self._load_file_list(dataset_key) self.log_stats() # ensure that each class meets the minimum and maximum requirements self._ensure_min_max() self._save_filelist_cache_combined() else: # if the file list was loaded from the cache, we need to initialize the labels manually for label_iid in self._image_infos_per_iid_label.keys(): _ = label.get_by_iid(label_iid) log.log("Finished file list loading.") tw.stop()
def run_inference_on_images(self, images: List[ImageInfo], merge=True) -> List[List[LabeledBoundingBox]]: """Run inference on the given image list. :param images: The images to process. Not yet split into windows. :param merge: If True, performance will be optimized by processing all images at once. Otherwise, performance may be worse, but you can get additional evaluations referring to a single image only. :return: The outer list contains one inner list for each provided input image. Each of such inner lists contains bounding boxes for all found foreground classes. """ # init TensorFlow before starting any timers self._init_tf() # The outer result list contains one inner list for each provided input image. all_results = [] timer_multiple = TimeWatcher("inference_img_multiple: {} imgs".format(len(images))) if merge: # extract windows from all images first and merge them timer_extracting = TimeWatcher("extract windows from all images and merge them") # extract using multiple threads # TODO most of the code's runtime is currently required for the "thread.acquire" method log.log(" -> extract") with ThreadPoolExecutor() as executor: results_per_img = list(executor.map(lambda img: self._extract_windows(img, convert_raw_to_np=False), images)) # merge log.log(" -> merge") windows_merged_info = [] # merge infos first for _, window_infos_of_one_image in results_per_img: windows_merged_info += window_infos_of_one_image # merge raw data by directly creating a common numpy array # (so no combined list in between) windows_merged_raw = np.empty( shape=[len(windows_merged_info), cf.get("img_width"), cf.get("img_height"), 3], dtype=cf.get("img_dtype")) raw_window_index = 0 for raw_windows_of_one_image, _ in results_per_img: for raw_window in raw_windows_of_one_image: windows_merged_raw[raw_window_index] = raw_window raw_window_index += 1 # release memory results_per_img = None timer_extracting.stop() # run inference using the merged windows log.log("run inference using the merged windows (total: {}, avg per img: {:.0f})".format( len(windows_merged_info), len(windows_merged_info) / len(images) )) merged_bboxes = self.run_inference_on_windows(windows_merged_info, windows_merged_raw) # separate results: group them by the input images all_results_dict = dict() for img in images: all_results_dict[img.path_original] = [] for bbox in merged_bboxes: all_results_dict[bbox.image.path_original].append(bbox) # transform dict to final result list all_results = [] for img in images: all_results.append(all_results_dict[img.path_original]) else: # process image after image # TODO implement multi-threading for the non-merging mode, too for img in images: img_results = [] try: timer_single = TimeWatcher("inference_img_single") img_results = self.run_inference_on_image(img) timer_single.stop() except FileNotFoundError: log.log(" .. Skipped {}, because the file could not be found".format( img.path_resized )) except: log.log(" .. Skipped {}, because of an unexpected error:\n{}".format( img.path_resized, traceback.format_exc() )) all_results.append(img_results) timer_multiple.stop() if merge: # runtime stats for inference only are available in merge mode only runtime_total = timer_extracting.elapsed_seconds runtime_avg = runtime_total / float(len(images)) log.log("Runtime window extraction: {} images in {} (avg: {}).".format( len(images), TimeWatcher.seconds_to_str(runtime_total), TimeWatcher.seconds_to_str(runtime_avg) )) runtime_total = timer_multiple.elapsed_seconds - timer_extracting.elapsed_seconds runtime_avg = runtime_total / float(len(images)) log.log("Runtime inference only: {} images in {} (avg: {}).".format( len(images), TimeWatcher.seconds_to_str(runtime_total), TimeWatcher.seconds_to_str(runtime_avg) )) # log runtime stats: inference including extracting runtime_total = timer_multiple.elapsed_seconds runtime_avg = runtime_total / float(len(images)) log.log("Runtime inference including window extraction: {} images in {} (avg: {}).".format( len(images), TimeWatcher.seconds_to_str(runtime_total), TimeWatcher.seconds_to_str(runtime_avg) )) return all_results
class TrainApp(BaseApp): """This app can be used to train a single net. Training of a cascade is handled in the subclass: TrainCascadeApp. """ def __init__(self, run_now=True): """Create new TrainApp. :param run_now: Whether to start the training right now. """ # introduce an additional attribute pointing to the currently used network self._net = None # the beta parameter of the f-measure that will be used as the loss function self._f_beta = cf.get("f_beta_default") # some more default values self._tw_training_complete = None self._files_checked = False self._final_results = None # call the super constructor BaseApp.__init__(self, run_now) def _main(self): self._check_files() try: self._run_training() except ConstantPredictionException: log.log( "Cancelling because of an ConstantPredictionException exception" ) self._on_cancel() def _check_files(self): """Test all loaded input images for TensorFlow compatibility.""" if not self._files_checked and cf.get( "remove_broken_images_before_training"): tw_broken_images = TimeWatcher("RemoveBrokenImages") FileListLoader().remove_broken_images() tw_broken_images.stop() self._files_checked = True def _run_training(self, bottleneck_in_size=None): """Run the training with current settings.""" self._tw_training_complete = TimeWatcher("SingleNetTrainingComplete") # load dataset ds = self._load_dataset() ds.log_stats() # create the neural network log.log('Creating the network') self._net = nt.NetTrainable( dataset=ds, snapshot_full_path=cf.get("snapshot_full_path"), f_beta=self._f_beta, bottleneck_in_size=bottleneck_in_size, nr=self.net_nr, nr_max=self.net_nr_max, use_inception_architecture=self.use_inception_architecture) # log some relevant configuration settings before starting the training self._log_current_config() # actually train the net self._net.train() # training has been completed correctly, finalize this session self._finalize_latest_session() def _log_current_config(self): """Log some relevant configuration settings (before starting the training).""" # Training log.log('Start Training..') if cf.get("timeout_minutes") > 0: log.log('.. timeout after {} minutes'.format( cf.get("timeout_minutes"))) log.log('.. total number of epochs: {}'.format(cf.get("epochs_total"))) log.log('.. batch size in each iteration: {}'.format( cf.get("batch_size"))) log.log('.. learning rate init: {}'.format( cf.get("learning_rate_init"))) log.log('.. learning rate decay: {}'.format( cf.get("learning_rate_decay"))) log.log('.. learning rate minimum: {}'.format( cf.get("learning_rate_min"))) log.log('.. L2 regularization active: {}'.format( cf.get("L2_regularization_strength") > 0)) if cf.get("L2_regularization_strength") > 0: log.log('.. L2 regularization strength: {}'.format( cf.get("L2_regularization_strength"))) log.log('.. L1 regularization active: {}'.format( cf.get("L1_regularization_strength") > 0)) if cf.get("L1_regularization_strength") > 0: log.log('.. L1 regularization strength: {}'.format( cf.get("L1_regularization_strength"))) log.log('.. drop out active: {}'.format( cf.get("dropout_rate") > 0 and cf.get("dropout_rate") < 1)) if cf.get("dropout_rate") > 0 and cf.get("dropout_rate") < 1: log.log('.. drop out rate: {}'.format(cf.get("dropout_rate"))) log.log(".. filter_dataset_after_caching: {}".format( cf.get("filter_dataset_after_caching"))) log.log(".. data augmentation online: {}".format( cf.get("data_augmentation_online"))) # log the used loss function if self._f_beta is None: if cf.get("weighted_cross_entropy"): if cf.get("weighted_cross_entropy_normalize"): loss_text = "normalized" else: loss_text = "UNnormalized" loss_text += " weighted" else: loss_text = "(unweighted)" loss_text += " cross entropy" else: loss_text = "f_{:.2f}".format(self._f_beta) log.log(".. loss function: {}".format(loss_text)) # print optimizer optimizer_name = "unknown" if cf.get("optimizer") == 0: optimizer_name = "GradientDescentOptimizer" elif cf.get("optimizer") == 1: optimizer_name = "AdamOptimizer" elif cf.get("optimizer") == 2: optimizer_name = "MomentumOptimizer" log.log('.. optimizer: {}'.format(optimizer_name)) if cf.get("optimizer") == 2: log.log('.. momentum update: {}'.format(cf.get("momentum"))) def _finalize_latest_session(self): """Finalize the training session that was started at last.""" # done self._tw_training_complete.stop() # the final evaluation may change the current net one last time, so we need to do this before exporting val_eval, test_eval, train_eval = self._net.final_evaluation() # save final results self._final_results = { SPLIT_KEY_TRAIN: train_eval, SPLIT_KEY_VAL: val_eval, SPLIT_KEY_TEST: test_eval } # exporting self._export_graph() def _on_cancel(self): """This method will be called when the user interrupted the main method.""" if self._net is not None: # cancel running training session self._net.stop_training() # the following line allows the user to enter just the letter y instead of "y". # (in PyCharm) y = "y" n = "n" # ask the user whether the latest results should be saved finalize_and_save = cf.get("auto_save_on_abort") or eval( input("Do you want to save the latest data? [y/n]")) if finalize_and_save != "n": log.log("Saving latest results.") # finalize as usual self._finalize_latest_session() else: log.log("Results deleted.") def _export_graph(self): """Persist the trained graph including the weights stored as constants.""" log.log("Exporting..") # define several file paths for the exported graph file graph_file_path_final = self._output_graph_file_path() graph_file_path_frozen = graph_file_path_final.replace( ".pb", "_training_frozen.pb") graph_file_path_optimized = graph_file_path_final.replace( ".pb", "_inference_optimized.pb") # freeze and serialize the current training graph log.log(" .. frozen version of the original training graph") frozen_training_graph_def = self._net.output_graph_def with gfile.FastGFile(graph_file_path_frozen, 'wb') as f: f.write(frozen_training_graph_def.SerializeToString()) # export a version optimized for inference # (actually, this is even mandatory to remove at least all data augmentation nodes) log.log(" .. inference-optimized graph") _ = InferenceOptimizer( input=graph_file_path_frozen, output=graph_file_path_optimized, frozen_graph=True, input_names=cf.get("graph_input_training_layer_name"), output_names=cf.get("graph_final_inference_layer_name")) # the inference optimization removes the shape information of the input node. so we need to add a new # placeholder providing explicit shape information. # (this information is necessary to allow dynamic cascade evaluation) # -> re-import the serialized graph, but replace the image placeholder tf.reset_default_graph() x_new = tf.placeholder(name=cf.get("graph_input_inference_layer_name"), shape=self._net.shape_data_batch, dtype=tf.float32) with tf.gfile.FastGFile(graph_file_path_optimized, 'rb') as f: unmodified_reimported_graph_def = tf.GraphDef() unmodified_reimported_graph_def.ParseFromString(f.read()) _ = tf.import_graph_def( unmodified_reimported_graph_def, name="", # this is important to prevent a default prefix input_map={cf.get("graph_input_training_layer_name"): x_new}) unmodified_reimported_graph_def = None # prevent using the wrong graph def # modified_reimported_graph_def = self._net.output_graph_def # don't use this one, as it won't provide the new graph yet modified_reimported_graph_def = tf.get_default_graph( ).as_graph_def() # and export it once again log.log( " .. finally a modified graph version using another placeholder to:\n{}" .format(graph_file_path_final)) with gfile.FastGFile(graph_file_path_final, 'wb') as f: f.write(modified_reimported_graph_def.SerializeToString()) # delete temp graph files os.remove(graph_file_path_frozen) os.remove(graph_file_path_optimized) def _output_graph_file_path(self) -> str: """Get the file path that will be used to save the final net graph.""" return cf.get("output_graph_file") def delete_graph_file(self): """Delete the graph file that was created by calling self._export_graph().""" if os.path.exists(self._output_graph_file_path()): log.log("Deleting graph file {}".format( self._output_graph_file_path())) os.remove(self._output_graph_file_path()) def _load_dataset(self) -> Dataset: """Load and provide the dataset used for training, validation and testing.""" # when using the inception net, we need to ignore the dimension settings made by the user if cf.get("append_inception"): cf.set("img_width", inception_builder.MODEL_INPUT_WIDTH) cf.set("img_height", inception_builder.MODEL_INPUT_HEIGHT) # use all available data ds_loader = dataset_loader.DatasetLoader() ds = ds_loader.dataset() return ds def _on_finished(self): """This method will be called when the main method is done (either finished regularly or cancelled).""" BaseApp._on_finished(self) # net can't be used after this point self._net.close_session() @property def net_nr(self): """The number (=index+1) of the current net.""" return 1 @property def net_nr_max(self): """The maximum net number (=index+1) of the current cascade.""" return 1 @property def final_results(self) -> Dict[str, Dict[str, float]]: """Get the final evaluation results. Results will be available after calling _finalize_latest_session, otherwise None will be returned. """ return self._final_results @property def use_inception_architecture(self): """If True, the inception architecture will be used to build the net. Otherwise, a custom architecture will be chosen. :return: """ return cf.get("append_inception")