Example #1
0
    def load(self, output_dir="output"):
        """Load dataset from `dataset_path` into memory and return
        a tuple of (images, labels) where `image[i]` is the i'th preprocessed image
        normalized into 0-1 range
        and `labels[i]` is the label of i'th image.

        `dataset_path` must have directories representing labels and
        images inside each directory."""

        cache_file = os.path.join(output_dir, self.cache_file_name)
        Printer.information("Searching for cache file: " + cache_file)

        if os.path.exists(cache_file):
            Printer.information("Cache file found. Loading from cache.")
            with open(cache_file, 'rb') as fr:
                images, labels = pickle.load(fr)
        else:
            Printer.warning("Cache file not found")
            Printer.information("Started loading dataset")

            images = []
            labels = []

            image_paths = self._get_image_paths()

            for ind, image_path in enumerate(image_paths):
                image = self._process_image(image_path)
                label = image_path.split(os.path.sep)[-2]

                images.append(image)
                labels.append(label)

                Printer.processing(f"Loaded {ind}/{len(image_paths)} images.")

            Printer.end_processing()

            images = np.array(images, dtype='float')
            labels = np.array(labels)

            if not sys.getsizeof(images) > 1024 * 1024 * 1024:
                with open(cache_file, 'wb') as fw:
                    pickle.dump((images, labels), fw)

        Printer.information("Dataset loaded into memory")
        self.images = images
        self.labels = labels
Example #2
0
    def _get_image_paths(self):
        """Return a list of image files inside the `base_path` """

        image_paths = []
        for (dir_path, _, file_names) in os.walk(self.base_path):
            for file_name in file_names:
                if os.extsep not in file_name:
                    Printer.warning(
                        "Files without extension found: {}".format(file_name))
                    continue
                extension = file_name.split(os.extsep)[-1]
                if extension not in self.image_extensions:
                    Printer.warning(
                        "Non-image files found: {}".format(file_name))
                    continue
                if '.ipynb' in dir_path:
                    Printer.warning("IPyNb caches found: {}".format(file_name))
                    continue
                image_paths.append(os.path.join(dir_path, file_name))

        random.shuffle(image_paths)

        Printer.information(f"Found {len(image_paths)} images")
        return image_paths
Example #3
0
    def encode_labels(self):
        label_binarizer = LabelBinarizer()
        self.encoded_labels = label_binarizer.fit_transform(self.labels)
        self.class_names = label_binarizer.classes_

        Printer.information("Labels Found: " + (", ".join(self.class_names)))
Example #4
0
 def save(self, model_output_path: str):
     self.model.save(model_output_path)
     Printer.information(f"Model saved as {model_output_path}")