def load(self, output_dir="output"): """Load dataset from `dataset_path` into memory and return a tuple of (images, labels) where `image[i]` is the i'th preprocessed image normalized into 0-1 range and `labels[i]` is the label of i'th image. `dataset_path` must have directories representing labels and images inside each directory.""" cache_file = os.path.join(output_dir, self.cache_file_name) Printer.information("Searching for cache file: " + cache_file) if os.path.exists(cache_file): Printer.information("Cache file found. Loading from cache.") with open(cache_file, 'rb') as fr: images, labels = pickle.load(fr) else: Printer.warning("Cache file not found") Printer.information("Started loading dataset") images = [] labels = [] image_paths = self._get_image_paths() for ind, image_path in enumerate(image_paths): image = self._process_image(image_path) label = image_path.split(os.path.sep)[-2] images.append(image) labels.append(label) Printer.processing(f"Loaded {ind}/{len(image_paths)} images.") Printer.end_processing() images = np.array(images, dtype='float') labels = np.array(labels) if not sys.getsizeof(images) > 1024 * 1024 * 1024: with open(cache_file, 'wb') as fw: pickle.dump((images, labels), fw) Printer.information("Dataset loaded into memory") self.images = images self.labels = labels
def _get_image_paths(self): """Return a list of image files inside the `base_path` """ image_paths = [] for (dir_path, _, file_names) in os.walk(self.base_path): for file_name in file_names: if os.extsep not in file_name: Printer.warning( "Files without extension found: {}".format(file_name)) continue extension = file_name.split(os.extsep)[-1] if extension not in self.image_extensions: Printer.warning( "Non-image files found: {}".format(file_name)) continue if '.ipynb' in dir_path: Printer.warning("IPyNb caches found: {}".format(file_name)) continue image_paths.append(os.path.join(dir_path, file_name)) random.shuffle(image_paths) Printer.information(f"Found {len(image_paths)} images") return image_paths
def encode_labels(self): label_binarizer = LabelBinarizer() self.encoded_labels = label_binarizer.fit_transform(self.labels) self.class_names = label_binarizer.classes_ Printer.information("Labels Found: " + (", ".join(self.class_names)))
def save(self, model_output_path: str): self.model.save(model_output_path) Printer.information(f"Model saved as {model_output_path}")