def __init__(self, shuffle_new_data=True): """Create new DatasetLoader. :param shuffle_new_data: If true, new (not cached) data will be shuffled once. Cached data will never be shuffled to allow reconstructing results. """ self._shuffle_new_data = shuffle_new_data FileListLoader.__init__(self)
def _check_files(self): """Test all loaded input images for TensorFlow compatibility.""" if not self._files_checked and cf.get( "remove_broken_images_before_training"): tw_broken_images = TimeWatcher("RemoveBrokenImages") FileListLoader().remove_broken_images() tw_broken_images.stop() self._files_checked = True
def reset(self, reset_file_list=True): """Reset existing data. :param reset_file_list: whether the underlaying file list should be reloaded, too. usually it can be reused. :return: """ if reset_file_list: FileListLoader.reset(self) # if a dataset hasn't been cached yet, it will be loaded file after file. # Those file samples will be collected in the following two vars before converted and merged to the numpy arrays # self._data and self._labels. self._temp_data_pool = None self._temp_label_pool = None # lazy load for actual data self._data = None self._labels = None self._dataset = None
def __init__(self, cascade_session_key: str, single_session_key: str, max_positive_test_imgs: int, max_negative_test_imgs: int): """Create a new EvaluateRuntimeApp. :param cascade_session_key: The session key of the serialized cascade model which should be evaluated. If None, cf.get("default_evaluation_model_cascade") will be used. :param single_session_key: The session key of the serialized single-net model which should be evaluated. If None, cf.get("default_evaluation_model_single") will be used. :param max_positive_test_imgs: The maximum number of foreground images which should be evaluated. :param max_negative_test_imgs: The maximum number of background images which should be evaluated. """ self._cascade_session_key = cascade_session_key self._single_session_key = single_session_key self._max_positive_test_imgs = max_positive_test_imgs self._max_negative_test_imgs = max_negative_test_imgs # prevent using image patches instead of the original images cf.set("cache_dataset", False) # sample images only once to ensure that all apps use the exact same files self._img_infos = FileListLoader().sample_image_infos( max_positive_test_imgs, max_negative_test_imgs) BaseApp.__init__(self)
def is_ignored(self) -> bool: """Whether this image file is listed in the ignore list.""" from data.db.file_list_loader import FileListLoader return FileListLoader().file_is_ignored(self)
""" Try to open each image in the loaded file list and add the failed ones to an ignore list. """ from data.db.file_list_loader import FileListLoader from utils import log import config as cf # overwrite settings such that not only a random subset of files will be checked cf.set("class_min_images", None) cf.set("class_max_images", None) # define which datasets you want to validate # cf.set("dataset_keys", cf.get("dataset_keys_available")) # load file lists loader = FileListLoader() loader.image_infos # check for broken images and blacklist them loader.remove_broken_images() # save log files log.log_set_name("log_broken_images") log.log_save(cf.get("log_dir"))
# sampling is supported only once for the complete input if os.path.isdir(cf.get("dataset_presampled_path_root")): raise AttributeError("Can't create an augmented input, because there is already one on disk.") # create missing base folder os.makedirs(cf.get("dataset_presampled_path_root")) # used base must be the native data # (so this must be called before loading any image data cf.set("dataset_path_root", cf.get("dataset_native_path_root")) # cache must be disabled, otherwise we may still try to load an already pre-sampled dataset cf.set("cache_dataset", False) # load native input FileListLoader().image_infos # total number of saved samples i_samples_total = 0 # this existing classifier will be used to identify potential faces (that were not annotated). # the following settings for cv2_scale_factor and cf.get("nms_opencv_min_neighbors") will produce quite a lot false positives in favor # of reducing false negatives. this would not be a useful configuration for a production environment, but we want to # ensure that no faces make their way into the background sample pool. if cf.get("foreground_equals_face"): log.log("background patches which look like human faces will be removed automatically") cv2_scale_factor = 1.1 face_cascade = cv2.CascadeClassifier( os.path.join(cf.get("path_opencv_data"), 'haarcascade_frontalface_default.xml')) # log some settings
""" Run inference using the OpenCV implementation of the Viola Jones face detector and visualize the results. """ from app.inference_ocv_app import InferenceOCVApp from app.inference_visualizer_app import InferenceVisualizerApp import config as cf from data.db.file_list_loader import FileListLoader # visualizing makes much more sense on the original native data cf.set("dataset_path_root", cf.get("dataset_native_path_root")) # prevent using image patches instead of the original images cf.set("cache_dataset", False) # we don't need too many images here cf.set("class_min_images", 1000) # create an inference app without running it app_inference = InferenceOCVApp() # run the actual inference along with the visualization app_visual = InferenceVisualizerApp(inference_app=app_inference, images=FileListLoader().sample_image_infos( max_positive_test_imgs=80, max_negative_test_imgs=20))
""" Run inference using the cascade. Results won't be visualized, but some stats will be logged. """ from app.inference_cascade_app import InferenceCascadeApp import config as cf from data.db.file_list_loader import FileListLoader from utils.cpu_gpu_switcher import CpuGpuSwitcher # visualizing makes much more sense on the original native data cf.set("dataset_path_root", cf.get("dataset_native_path_root")) # prevent using image patches instead of the original images cf.set("cache_dataset", False) # we don't need too many images here cf.set("class_min_images", 1000) # USE CPU ONLY (so we can execute this while training is active, too) CpuGpuSwitcher().disable_gpu() # create inference app without actually running anything yet app = InferenceCascadeApp() # run inference on some random samples app.run_inference_on_images(FileListLoader().sample_image_infos( max_positive_test_imgs=80, max_negative_test_imgs=20))
""" Just load the file list once to see the associated stats. """ from data.db.file_list_loader import FileListLoader from utils import log import config as cf # load file lists loader = FileListLoader() loader.image_infos # save log files log.log_set_name("log_file_list") log.log_save(cf.get("log_dir"))