Example #1
0
    def _run_training(self, bottleneck_in_size=None):
        """Run the training with current settings."""

        self._tw_training_complete = TimeWatcher("SingleNetTrainingComplete")

        # load dataset
        ds = self._load_dataset()
        ds.log_stats()

        # create the neural network
        log.log('Creating the network')
        self._net = nt.NetTrainable(
            dataset=ds,
            snapshot_full_path=cf.get("snapshot_full_path"),
            f_beta=self._f_beta,
            bottleneck_in_size=bottleneck_in_size,
            nr=self.net_nr,
            nr_max=self.net_nr_max,
            use_inception_architecture=self.use_inception_architecture)

        # log some relevant configuration settings before starting the training
        self._log_current_config()

        # actually train the net
        self._net.train()

        # training has been completed correctly, finalize this session
        self._finalize_latest_session()
Example #2
0
    def run_inference_on_image(self, image: ImageInfo) -> List[LabeledBoundingBox]:
        """Run inference on a single image.

        :param image: The image to process. Not yet split into windows.
        :return: A list containing all bounding boxes belonging to the foreground classes.
        """
        # ensure that TensorFlow has been initialized
        self._init_tf()

        log.log(" ")
        log.log("#################################################################")
        log.log("Run inference on {}".format(image.basename))
        tw_img_total = TimeWatcher("Run inference on {}".format(image.basename))

        # split the current image into sliding windows
        # windows_raw contains only the image patches and is ready to be fed into the network
        # windows_info contains further meta data about the windows
        log.log(" -> extracting windows")
        windows_raw, windows_info = self._extract_windows(image)

        # collect all found foreground bounding boxes of this image in the following list
        img_results = self.run_inference_on_windows(windows_info, windows_raw)

        tw_img_total.stop()

        # log
        log.log("-> final result: {}/{} ({:.2f}%) positive windows".format(
            len(img_results),
            len(windows_info),
            len(img_results) / len(windows_info) * 100
        ))
        log.log("#################################################################")
        log.log(" ")

        return img_results
Example #3
0
 def _check_files(self):
     """Test all loaded input images for TensorFlow compatibility."""
     if not self._files_checked and cf.get(
             "remove_broken_images_before_training"):
         tw_broken_images = TimeWatcher("RemoveBrokenImages")
         FileListLoader().remove_broken_images()
         tw_broken_images.stop()
         self._files_checked = True
Example #4
0
class BaseApp(metaclass=abc.ABCMeta):
    """This class is the base class of all "apps" created in this project."""
    def __init__(self, run_now=True):
        """Create a new BaseApp.

        :param run_now: Whether this app should run right now.
        """
        self._time_watcher = None

        # overclock GPU
        if cf.get("overclock_gpu_shell") is not None:
            log.log(
                "automatically overclocking the GPU by using the following shell script: {}"
                .format(cf.get("overclock_gpu_shell")))
            call(cf.get("overclock_gpu_shell"), shell=True)

        if run_now:
            self.run()

    def run(self):
        """Run this app.

        This method is wrapping the main method to introduce some additional events.
        :return:
        """
        self._time_watcher = TimeWatcher(
            os.path.basename(sys.argv[0]).replace(".py", ""))
        try:
            self._main()
        except KeyboardInterrupt:
            log.log("WARNING: User interrupted progress.")
            self._on_cancel()

        self._on_finished()

        self._time_watcher.stop()

    @abc.abstractmethod
    def _main(self):
        """This method will be called on object initialization to run the actual programme."""
        return

    def _on_cancel(self):
        """This method will be called when the user interrupted the main method."""
        return

    def _on_finished(self):
        """This method will be called when the main method is done (either finished regularly or cancelled)."""
        # save log files
        log.log_set_name(self.__class__.__name__)

        # we don't flush the log here, because other apps include each other
        log.log_save(cf.get("log_dir"), flush=False)
        return
Example #5
0
    def _run_nms(candidates_bboxes: List[LabeledBoundingBox]) -> List[LabeledBoundingBox]:
        """Run Non-Maximum Suppression on the given bounding boxes.

        :param candidates_bboxes:
        :return:
        """
        log.log("Non-Maximum Suppression: {} ({} candidates)".format(cf.get("nms"), len(candidates_bboxes)))

        if cf.get("nms") == cf.NMS_DISABLED:
            return candidates_bboxes

        tw_nms = TimeWatcher("NMS")
        results = []

        if cf.get("nms") == cf.NMS_OPENCV:

            # prepare NMS by converting all bbox objects into the format required by OpenCV
            log.log("  -> prepare NMS by converting all bbox objects into the format required by OpenCV")
            candidates_opencv_infos_per_img = dict()  # each element is (x, y, w, h)
            image_infos_per_key = dict()
            for bbox in candidates_bboxes:
                if bbox.image.full_key not in candidates_opencv_infos_per_img:
                    candidates_opencv_infos_per_img[bbox.image.full_key] = []
                    image_infos_per_key[bbox.image.full_key] = bbox.image
                # TODO this does not take the original confidence into account! (that's why the different calc methods don't make any difference, too.)
                opencv_info = (bbox.xmin, bbox.ymin, bbox.width, bbox.height)
                candidates_opencv_infos_per_img[bbox.image.full_key].append(opencv_info)

            # actually run NMS
            min_neighbors = cf.get("nms_opencv_min_neighbors")
            log.log("  -> actually run NMS with a threshold of {}".format(min_neighbors))
            for img_key, candidates_opencv_infos in candidates_opencv_infos_per_img.items():
                results_opencv_infos, weights = cv2.groupRectangles(candidates_opencv_infos, min_neighbors)

                # convert the OpenCV information to bbox objects again
                # log.log("  -> convert the OpenCV information to bbox objects again")
                label_object = label.get_by_iid(label.IID_FOREGROUND)
                for i in range(len(results_opencv_infos)):
                    (x, y, w, h) = results_opencv_infos[i]
                    # note that, the new confidence score isn't normalized
                    confidence = float(weights[i])
                    bbox = LabeledBoundingBox(x, y, x + w, y + h, label_object, confidence,
                                              image_infos_per_key[img_key])
                    results.append(bbox)

        tw_nms.stop()
        log.log("  -> kept {}/{} windows".format(len(results), len(candidates_bboxes)))

        return results
Example #6
0
    def run(self):
        """Run this app.

        This method is wrapping the main method to introduce some additional events.
        :return:
        """
        self._time_watcher = TimeWatcher(
            os.path.basename(sys.argv[0]).replace(".py", ""))
        try:
            self._main()
        except KeyboardInterrupt:
            log.log("WARNING: User interrupted progress.")
            self._on_cancel()

        self._on_finished()

        self._time_watcher.stop()
    def _load_file_lists(self):
        """Load all file lists for all datasets."""

        tw = TimeWatcher("FileListLoading")

        log.log("Load file lists for dataset(s): {}".format(
            cf.get("dataset_keys")
        ))

        log.log(".. Required image dimension: {}x{}px".format(cf.get("img_width"),
                                                              cf.get("img_height")))

        self._image_infos_per_dataset, self._image_infos, self._image_infos_per_iid_label = self._check_filelist_cache_combined()

        if self._image_infos_per_dataset is None or self._image_infos is None or self._image_infos_per_iid_label is None:

            self._image_infos_per_dataset = dict()
            self._image_infos = []
            self._image_infos_per_iid_label = dict()

            # load each dataset separately
            for dataset_key in cf.get("dataset_keys"):
                self._load_file_list(dataset_key)

            self.log_stats()

            # ensure that each class meets the minimum and maximum requirements
            self._ensure_min_max()

            self._save_filelist_cache_combined()
        else:
            # if the file list was loaded from the cache, we need to initialize the labels manually
            for label_iid in self._image_infos_per_iid_label.keys():
                _ = label.get_by_iid(label_iid)

        log.log("Finished file list loading.")

        tw.stop()
Example #8
0
    def run_inference_on_images(self, images: List[ImageInfo], merge=True) -> List[List[LabeledBoundingBox]]:
        """Run inference on the given image list.

        :param images: The images to process. Not yet split into windows.
        :param merge: If True, performance will be optimized by processing all images at once. Otherwise,
                    performance may be worse, but you can get additional evaluations referring to a single image only.
        :return: The outer list contains one inner list for each provided input image. Each of such inner lists contains
                    bounding boxes for all found foreground classes.
        """
        # init TensorFlow before starting any timers
        self._init_tf()

        # The outer result list contains one inner list for each provided input image.
        all_results = []

        timer_multiple = TimeWatcher("inference_img_multiple: {} imgs".format(len(images)))

        if merge:
            # extract windows from all images first and merge them
            timer_extracting = TimeWatcher("extract windows from all images and merge them")

            # extract using multiple threads
            # TODO most of the code's runtime is currently required for the "thread.acquire" method
            log.log(" -> extract")
            with ThreadPoolExecutor() as executor:
                results_per_img = list(executor.map(lambda img: self._extract_windows(img, convert_raw_to_np=False),
                                                    images))

            # merge
            log.log(" -> merge")
            windows_merged_info = []  # merge infos first
            for _, window_infos_of_one_image in results_per_img:
                windows_merged_info += window_infos_of_one_image

            # merge raw data by directly creating a common numpy array
            # (so no combined list in between)
            windows_merged_raw = np.empty(
                shape=[len(windows_merged_info), cf.get("img_width"), cf.get("img_height"), 3],
                dtype=cf.get("img_dtype"))
            raw_window_index = 0
            for raw_windows_of_one_image, _ in results_per_img:
                for raw_window in raw_windows_of_one_image:
                    windows_merged_raw[raw_window_index] = raw_window
                    raw_window_index += 1

            # release memory
            results_per_img = None

            timer_extracting.stop()

            # run inference using the merged windows
            log.log("run inference using the merged windows (total: {}, avg per img: {:.0f})".format(
                len(windows_merged_info),
                len(windows_merged_info) / len(images)
            ))
            merged_bboxes = self.run_inference_on_windows(windows_merged_info, windows_merged_raw)

            # separate results: group them by the input images
            all_results_dict = dict()
            for img in images:
                all_results_dict[img.path_original] = []

            for bbox in merged_bboxes:
                all_results_dict[bbox.image.path_original].append(bbox)

            # transform dict to final result list
            all_results = []
            for img in images:
                all_results.append(all_results_dict[img.path_original])
        else:
            # process image after image
            # TODO implement multi-threading for the non-merging mode, too
            for img in images:
                img_results = []
                try:
                    timer_single = TimeWatcher("inference_img_single")
                    img_results = self.run_inference_on_image(img)
                    timer_single.stop()
                except FileNotFoundError:
                    log.log(" .. Skipped {}, because the file could not be found".format(
                        img.path_resized
                    ))
                except:
                    log.log(" .. Skipped {}, because of an unexpected error:\n{}".format(
                        img.path_resized,
                        traceback.format_exc()
                    ))

                all_results.append(img_results)

        timer_multiple.stop()

        if merge:
            # runtime stats for inference only are available in merge mode only
            runtime_total = timer_extracting.elapsed_seconds
            runtime_avg = runtime_total / float(len(images))
            log.log("Runtime window extraction: {} images in {} (avg: {}).".format(
                len(images),
                TimeWatcher.seconds_to_str(runtime_total),
                TimeWatcher.seconds_to_str(runtime_avg)
            ))
            runtime_total = timer_multiple.elapsed_seconds - timer_extracting.elapsed_seconds
            runtime_avg = runtime_total / float(len(images))
            log.log("Runtime inference only: {} images in {} (avg: {}).".format(
                len(images),
                TimeWatcher.seconds_to_str(runtime_total),
                TimeWatcher.seconds_to_str(runtime_avg)
            ))

        # log runtime stats: inference including extracting
        runtime_total = timer_multiple.elapsed_seconds
        runtime_avg = runtime_total / float(len(images))
        log.log("Runtime inference including window extraction: {} images in {} (avg: {}).".format(
            len(images),
            TimeWatcher.seconds_to_str(runtime_total),
            TimeWatcher.seconds_to_str(runtime_avg)
        ))

        return all_results
Example #9
0
class TrainApp(BaseApp):
    """This app can be used to train a single net.

    Training of a cascade is handled in the subclass: TrainCascadeApp.
    """
    def __init__(self, run_now=True):
        """Create new TrainApp.

        :param run_now: Whether to start the training right now.
        """
        # introduce an additional attribute pointing to the currently used network
        self._net = None

        # the beta parameter of the f-measure that will be used as the loss function
        self._f_beta = cf.get("f_beta_default")

        # some more default values
        self._tw_training_complete = None
        self._files_checked = False
        self._final_results = None

        # call the super constructor
        BaseApp.__init__(self, run_now)

    def _main(self):
        self._check_files()
        try:
            self._run_training()
        except ConstantPredictionException:
            log.log(
                "Cancelling because of an ConstantPredictionException exception"
            )
            self._on_cancel()

    def _check_files(self):
        """Test all loaded input images for TensorFlow compatibility."""
        if not self._files_checked and cf.get(
                "remove_broken_images_before_training"):
            tw_broken_images = TimeWatcher("RemoveBrokenImages")
            FileListLoader().remove_broken_images()
            tw_broken_images.stop()
            self._files_checked = True

    def _run_training(self, bottleneck_in_size=None):
        """Run the training with current settings."""

        self._tw_training_complete = TimeWatcher("SingleNetTrainingComplete")

        # load dataset
        ds = self._load_dataset()
        ds.log_stats()

        # create the neural network
        log.log('Creating the network')
        self._net = nt.NetTrainable(
            dataset=ds,
            snapshot_full_path=cf.get("snapshot_full_path"),
            f_beta=self._f_beta,
            bottleneck_in_size=bottleneck_in_size,
            nr=self.net_nr,
            nr_max=self.net_nr_max,
            use_inception_architecture=self.use_inception_architecture)

        # log some relevant configuration settings before starting the training
        self._log_current_config()

        # actually train the net
        self._net.train()

        # training has been completed correctly, finalize this session
        self._finalize_latest_session()

    def _log_current_config(self):
        """Log some relevant configuration settings (before starting the training)."""
        # Training
        log.log('Start Training..')
        if cf.get("timeout_minutes") > 0:
            log.log('.. timeout after {} minutes'.format(
                cf.get("timeout_minutes")))
        log.log('.. total number of epochs: {}'.format(cf.get("epochs_total")))
        log.log('.. batch size in each iteration: {}'.format(
            cf.get("batch_size")))
        log.log('.. learning rate init: {}'.format(
            cf.get("learning_rate_init")))
        log.log('.. learning rate decay: {}'.format(
            cf.get("learning_rate_decay")))
        log.log('.. learning rate minimum: {}'.format(
            cf.get("learning_rate_min")))
        log.log('.. L2 regularization active: {}'.format(
            cf.get("L2_regularization_strength") > 0))
        if cf.get("L2_regularization_strength") > 0:
            log.log('.. L2 regularization strength: {}'.format(
                cf.get("L2_regularization_strength")))
        log.log('.. L1 regularization active: {}'.format(
            cf.get("L1_regularization_strength") > 0))
        if cf.get("L1_regularization_strength") > 0:
            log.log('.. L1 regularization strength: {}'.format(
                cf.get("L1_regularization_strength")))
        log.log('.. drop out active: {}'.format(
            cf.get("dropout_rate") > 0 and cf.get("dropout_rate") < 1))
        if cf.get("dropout_rate") > 0 and cf.get("dropout_rate") < 1:
            log.log('.. drop out rate: {}'.format(cf.get("dropout_rate")))
        log.log(".. filter_dataset_after_caching: {}".format(
            cf.get("filter_dataset_after_caching")))
        log.log(".. data augmentation online: {}".format(
            cf.get("data_augmentation_online")))

        # log the used loss function
        if self._f_beta is None:
            if cf.get("weighted_cross_entropy"):
                if cf.get("weighted_cross_entropy_normalize"):
                    loss_text = "normalized"
                else:
                    loss_text = "UNnormalized"
                loss_text += " weighted"
            else:
                loss_text = "(unweighted)"
            loss_text += " cross entropy"
        else:
            loss_text = "f_{:.2f}".format(self._f_beta)
        log.log(".. loss function: {}".format(loss_text))

        # print optimizer
        optimizer_name = "unknown"
        if cf.get("optimizer") == 0:
            optimizer_name = "GradientDescentOptimizer"
        elif cf.get("optimizer") == 1:
            optimizer_name = "AdamOptimizer"
        elif cf.get("optimizer") == 2:
            optimizer_name = "MomentumOptimizer"
        log.log('.. optimizer: {}'.format(optimizer_name))

        if cf.get("optimizer") == 2:
            log.log('.. momentum update: {}'.format(cf.get("momentum")))

    def _finalize_latest_session(self):
        """Finalize the training session that was started at last."""
        # done
        self._tw_training_complete.stop()

        # the final evaluation may change the current net one last time, so we need to do this before exporting
        val_eval, test_eval, train_eval = self._net.final_evaluation()

        # save final results
        self._final_results = {
            SPLIT_KEY_TRAIN: train_eval,
            SPLIT_KEY_VAL: val_eval,
            SPLIT_KEY_TEST: test_eval
        }

        # exporting
        self._export_graph()

    def _on_cancel(self):
        """This method will be called when the user interrupted the main method."""
        if self._net is not None:
            # cancel running training session
            self._net.stop_training()

            # the following line allows the user to enter just the letter y instead of "y".
            # (in PyCharm)
            y = "y"
            n = "n"

            # ask the user whether the latest results should be saved
            finalize_and_save = cf.get("auto_save_on_abort") or eval(
                input("Do you want to save the latest data? [y/n]"))
            if finalize_and_save != "n":
                log.log("Saving latest results.")
                # finalize as usual
                self._finalize_latest_session()
            else:
                log.log("Results deleted.")

    def _export_graph(self):
        """Persist the trained graph including the weights stored as constants."""
        log.log("Exporting..")

        # define several file paths for the exported graph file
        graph_file_path_final = self._output_graph_file_path()
        graph_file_path_frozen = graph_file_path_final.replace(
            ".pb", "_training_frozen.pb")
        graph_file_path_optimized = graph_file_path_final.replace(
            ".pb", "_inference_optimized.pb")

        # freeze and serialize the current training graph
        log.log("  .. frozen version of the original training graph")
        frozen_training_graph_def = self._net.output_graph_def
        with gfile.FastGFile(graph_file_path_frozen, 'wb') as f:
            f.write(frozen_training_graph_def.SerializeToString())

        # export a version optimized for inference
        # (actually, this is even mandatory to remove at least all data augmentation nodes)
        log.log("  .. inference-optimized graph")
        _ = InferenceOptimizer(
            input=graph_file_path_frozen,
            output=graph_file_path_optimized,
            frozen_graph=True,
            input_names=cf.get("graph_input_training_layer_name"),
            output_names=cf.get("graph_final_inference_layer_name"))

        # the inference optimization removes the shape information of the input node. so we need to add a new
        # placeholder providing explicit shape information.
        # (this information is necessary to allow dynamic cascade evaluation)
        # -> re-import the serialized graph, but replace the image placeholder
        tf.reset_default_graph()
        x_new = tf.placeholder(name=cf.get("graph_input_inference_layer_name"),
                               shape=self._net.shape_data_batch,
                               dtype=tf.float32)
        with tf.gfile.FastGFile(graph_file_path_optimized, 'rb') as f:
            unmodified_reimported_graph_def = tf.GraphDef()
            unmodified_reimported_graph_def.ParseFromString(f.read())
            _ = tf.import_graph_def(
                unmodified_reimported_graph_def,
                name="",  # this is important to prevent a default prefix
                input_map={cf.get("graph_input_training_layer_name"): x_new})
            unmodified_reimported_graph_def = None  # prevent using the wrong graph def
            # modified_reimported_graph_def = self._net.output_graph_def  # don't use this one, as it won't provide the new graph yet
            modified_reimported_graph_def = tf.get_default_graph(
            ).as_graph_def()

        # and export it once again
        log.log(
            "  .. finally a modified graph version using another placeholder to:\n{}"
            .format(graph_file_path_final))
        with gfile.FastGFile(graph_file_path_final, 'wb') as f:
            f.write(modified_reimported_graph_def.SerializeToString())

        # delete temp graph files
        os.remove(graph_file_path_frozen)
        os.remove(graph_file_path_optimized)

    def _output_graph_file_path(self) -> str:
        """Get the file path that will be used to save the final net graph."""
        return cf.get("output_graph_file")

    def delete_graph_file(self):
        """Delete the graph file that was created by calling self._export_graph()."""
        if os.path.exists(self._output_graph_file_path()):
            log.log("Deleting graph file {}".format(
                self._output_graph_file_path()))
            os.remove(self._output_graph_file_path())

    def _load_dataset(self) -> Dataset:
        """Load and provide the dataset used for training, validation and testing."""
        # when using the inception net, we need to ignore the dimension settings made by the user
        if cf.get("append_inception"):
            cf.set("img_width", inception_builder.MODEL_INPUT_WIDTH)
            cf.set("img_height", inception_builder.MODEL_INPUT_HEIGHT)

        # use all available data
        ds_loader = dataset_loader.DatasetLoader()
        ds = ds_loader.dataset()
        return ds

    def _on_finished(self):
        """This method will be called when the main method is done (either finished regularly or cancelled)."""
        BaseApp._on_finished(self)
        # net can't be used after this point
        self._net.close_session()

    @property
    def net_nr(self):
        """The number (=index+1) of the current net."""
        return 1

    @property
    def net_nr_max(self):
        """The maximum net number (=index+1) of the current cascade."""
        return 1

    @property
    def final_results(self) -> Dict[str, Dict[str, float]]:
        """Get the final evaluation results.

        Results will be available after calling _finalize_latest_session, otherwise None will be returned.
        """
        return self._final_results

    @property
    def use_inception_architecture(self):
        """If True, the inception architecture will be used to build the net.
        Otherwise, a custom architecture will be chosen.

        :return:
        """
        return cf.get("append_inception")