def __init__(self, shuffle_new_data=True):
        """Create new DatasetLoader.

        :param shuffle_new_data: If true, new (not cached) data will be shuffled once. Cached data will never be
            shuffled to allow reconstructing results.
        """
        self._shuffle_new_data = shuffle_new_data

        FileListLoader.__init__(self)
Beispiel #2
0
 def _check_files(self):
     """Test all loaded input images for TensorFlow compatibility."""
     if not self._files_checked and cf.get(
             "remove_broken_images_before_training"):
         tw_broken_images = TimeWatcher("RemoveBrokenImages")
         FileListLoader().remove_broken_images()
         tw_broken_images.stop()
         self._files_checked = True
    def reset(self, reset_file_list=True):
        """Reset existing data.

        :param reset_file_list: whether the underlaying file list should be reloaded, too. usually it can be reused.
        :return:
        """
        if reset_file_list:
            FileListLoader.reset(self)

        # if a dataset hasn't been cached yet, it will be loaded file after file.
        # Those file samples will be collected in the following two vars before converted and merged to the numpy arrays
        # self._data and self._labels.
        self._temp_data_pool = None
        self._temp_label_pool = None

        # lazy load for actual data
        self._data = None
        self._labels = None
        self._dataset = None
Beispiel #4
0
    def __init__(self, cascade_session_key: str, single_session_key: str,
                 max_positive_test_imgs: int, max_negative_test_imgs: int):
        """Create a new EvaluateRuntimeApp.

        :param cascade_session_key: The session key of the serialized cascade model which should be evaluated.
                    If None, cf.get("default_evaluation_model_cascade") will be used.
        :param single_session_key: The session key of the serialized single-net model which should be evaluated.
                    If None, cf.get("default_evaluation_model_single") will be used.
        :param max_positive_test_imgs: The maximum number of foreground images which should be evaluated.
        :param max_negative_test_imgs: The maximum number of background images which should be evaluated.
        """
        self._cascade_session_key = cascade_session_key
        self._single_session_key = single_session_key
        self._max_positive_test_imgs = max_positive_test_imgs
        self._max_negative_test_imgs = max_negative_test_imgs

        # prevent using image patches instead of the original images
        cf.set("cache_dataset", False)

        # sample images only once to ensure that all apps use the exact same files
        self._img_infos = FileListLoader().sample_image_infos(
            max_positive_test_imgs, max_negative_test_imgs)

        BaseApp.__init__(self)
Beispiel #5
0
 def is_ignored(self) -> bool:
     """Whether this image file is listed in the ignore list."""
     from data.db.file_list_loader import FileListLoader
     return FileListLoader().file_is_ignored(self)
"""
Try to open each image in the loaded file list and add the failed ones to an ignore list.
"""
from data.db.file_list_loader import FileListLoader
from utils import log
import config as cf

# overwrite settings such that not only a random subset of files will be checked
cf.set("class_min_images", None)
cf.set("class_max_images", None)

# define which datasets you want to validate
# cf.set("dataset_keys", cf.get("dataset_keys_available"))

# load file lists
loader = FileListLoader()
loader.image_infos

# check for broken images and blacklist them
loader.remove_broken_images()

# save log files
log.log_set_name("log_broken_images")
log.log_save(cf.get("log_dir"))
Beispiel #7
0
# sampling is supported only once for the complete input
if os.path.isdir(cf.get("dataset_presampled_path_root")):
    raise AttributeError("Can't create an augmented input, because there is already one on disk.")

# create missing base folder
os.makedirs(cf.get("dataset_presampled_path_root"))

# used base must be the native data
# (so this must be called before loading any image data
cf.set("dataset_path_root", cf.get("dataset_native_path_root"))

# cache must be disabled, otherwise we may still try to load an already pre-sampled dataset
cf.set("cache_dataset", False)

# load native input
FileListLoader().image_infos

# total number of saved samples
i_samples_total = 0

# this existing classifier will be used to identify potential faces (that were not annotated).
# the following settings for cv2_scale_factor and cf.get("nms_opencv_min_neighbors") will produce quite a lot false positives in favor
# of reducing false negatives. this would not be a useful configuration for a production environment, but we want to
# ensure that no faces make their way into the background sample pool.
if cf.get("foreground_equals_face"):
    log.log("background patches which look like human faces will be removed automatically")
    cv2_scale_factor = 1.1
    face_cascade = cv2.CascadeClassifier(
                os.path.join(cf.get("path_opencv_data"), 'haarcascade_frontalface_default.xml'))

# log some settings
"""
Run inference using the OpenCV implementation of the Viola Jones face detector and visualize the results.
"""
from app.inference_ocv_app import InferenceOCVApp
from app.inference_visualizer_app import InferenceVisualizerApp
import config as cf
from data.db.file_list_loader import FileListLoader

# visualizing makes much more sense on the original native data
cf.set("dataset_path_root", cf.get("dataset_native_path_root"))

# prevent using image patches instead of the original images
cf.set("cache_dataset", False)

# we don't need too many images here
cf.set("class_min_images", 1000)

# create an inference app without running it
app_inference = InferenceOCVApp()

# run the actual inference along with the visualization
app_visual = InferenceVisualizerApp(inference_app=app_inference,
                                    images=FileListLoader().sample_image_infos(
                                        max_positive_test_imgs=80,
                                        max_negative_test_imgs=20))
"""
Run inference using the cascade.
Results won't be visualized, but some stats will be logged.
"""
from app.inference_cascade_app import InferenceCascadeApp
import config as cf
from data.db.file_list_loader import FileListLoader
from utils.cpu_gpu_switcher import CpuGpuSwitcher

# visualizing makes much more sense on the original native data
cf.set("dataset_path_root", cf.get("dataset_native_path_root"))

# prevent using image patches instead of the original images
cf.set("cache_dataset", False)

# we don't need too many images here
cf.set("class_min_images", 1000)

# USE CPU ONLY (so we can execute this while training is active, too)
CpuGpuSwitcher().disable_gpu()

# create inference app without actually running anything yet
app = InferenceCascadeApp()

# run inference on some random samples
app.run_inference_on_images(FileListLoader().sample_image_infos(
    max_positive_test_imgs=80, max_negative_test_imgs=20))
"""
Just load the file list once to see the associated stats.
"""
from data.db.file_list_loader import FileListLoader
from utils import log
import config as cf

# load file lists
loader = FileListLoader()
loader.image_infos

# save log files
log.log_set_name("log_file_list")
log.log_save(cf.get("log_dir"))