Exemple #1
0
    def test_save_load_dense(self):
        """Test save/load of CArray"""
        self.logger.info(
            "UNITTEST - CSparse - Testing save/load for dense matrix")

        self.array_dense.save(self.test_file, overwrite=True)

        loaded_array_dense = CArray.load(self.test_file,
                                         arrayformat='dense',
                                         dtype=int)

        self.assertFalse((loaded_array_dense != self.array_dense).any(),
                         "Saved and loaded arrays (sparse) are not equal!")

        # Only 'dense' and 'sparse' arrayformat are supported
        with self.assertRaises(ValueError):
            CArray.load(self.test_file, arrayformat='test')
Exemple #2
0
 def _test_rule(self, rule, n_prototypes=20, random_state=None):
     """Generic test case for prototype selectors."""
     self.logger.info("Testing: " + rule + " selector.")
     ps = CPrototypesSelector.create(rule)
     ps.verbose = 2
     if random_state is None:
         ds_reduced = ps.select(self.dataset, n_prototypes=n_prototypes)
     else:
         ds_reduced = ps.select(self.dataset,
                                n_prototypes=n_prototypes,
                                random_state=random_state)
     idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule))
     self.assert_array_equal(ps.sel_idx,
                             CArray.load(idx_path, dtype=int).ravel())
     if self.plots is True:
         self.draw_selection(ds_reduced, rule)
Exemple #3
0
    def test_ps_kmedians(self):
        rule = 'k-medians'
        self.logger.info("Testing: " + rule + " selector.")
        ps = CPrototypesSelector.create(rule)
        ps.verbose = 2
        ds_reduced = ps.select(self.dataset, n_prototypes=20, random_state=0)

        # this test will fail with sklearn < 0.22, because of an issue in
        # random_state setting inside the k-means algorithm
        import sklearn
        from pkg_resources import parse_version
        if not parse_version(sklearn.__version__) < parse_version("0.22"):
            idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule))
            self.assert_array_equal(ps.sel_idx,
                                    CArray.load(idx_path, dtype=int).ravel())
        if self.plots is True:
            self.draw_selection(ds_reduced, rule)
Exemple #4
0
    def test_save_load_sparse_conversion(self):
        """Test save/load of CArray"""
        # Array should be stored and loaded correctly whatever sparse format
        self.array_sparse._data._data = self.array_sparse._data.todok()

        self.array_sparse.save(self.test_file)

        # Saving to a file handle is not supported for sparse arrays
        with self.assertRaises(NotImplementedError):
            with open(self.test_file_2, 'w') as f:
                self.array_sparse.save(f)

        loaded_array_sparse = CArray.load(self.test_file,
                                          arrayformat='sparse',
                                          dtype=int)

        self.assertFalse((loaded_array_sparse != self.array_sparse).any(),
                         "Saved and loaded arrays (sparse) are not equal!")
Exemple #5
0
    def test_save_load_sparse(self):
        """Test save/load of CArray"""
        self.logger.info(
            "UNITTEST - CArray - Testing save/load for sparse matrix")

        self.array_sparse.save(self.test_file)

        # Saving to a file handle is not supported for sparse arrays
        with self.assertRaises(NotImplementedError):
            with open(self.test_file_2, 'w') as f:
                self.array_sparse.save(f)

        loaded_array_sparse = CArray.load(self.test_file,
                                          arrayformat='sparse',
                                          dtype=int)

        self.assertFalse((loaded_array_sparse != self.array_sparse).any(),
                         "Saved and loaded arrays (sparse) are not equal!")
    def test_ps_kmedians(self):
        rule = 'k-medians'
        self.logger.info("Testing: " + rule + " selector.")
        ps = CPrototypesSelector.create(rule)
        ps.verbose = 2
        ds_reduced = ps.select(self.dataset, n_prototypes=20, random_state=0)

        if self.plots is True:
            self.draw_selection(ds_reduced, rule)

        # k_means in sklearn > 0.24 returns a different result
        import sklearn
        from pkg_resources import parse_version
        if parse_version(sklearn.__version__) < parse_version("0.24"):
            idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule))
        else:
            idx_path = fm.join(fm.abspath(__file__),
                               "idx_{:}_sk0-24.gz".format(rule))

        self.assert_array_equal(ps.sel_idx,
                                CArray.load(idx_path, dtype=int).ravel())
    def load(self, ds_path, img_format, label_dtype=None, load_data=True):
        """Load all images of specified format inside given path.

        Extra dataset attributes:
         - 'id': last `ds_path` folder.
         - 'img_w', 'img_h': size of the images in pixels.
         - 'img_c': images number of channels.
         - Any other custom attribute is retrieved from 'attributes.txt' file.
           Only attributes of `str` type are currently supported.

        Parameters
        ----------
        ds_path : str
            Full path to dataset folder.
        img_format : str
            Format of the files to load.
        label_dtype : str or dtype, optional
            Datatype of the labels. If None, labels will be strings.
        load_data : bool, optional
            If True (default) features will be stored.
            Otherwise store the paths to the files with dtype=object.

        """
        # Labels file MUST be available
        if not fm.file_exist(fm.join(ds_path, 'clients.txt')):
            raise OSError("cannot load clients file.")

        # Ensuring 'img_format' always has an extension-like pattern
        img_ext = '.' + img_format.strip('.').lower()

        # Dimensions of each image
        img_w = CArray([], dtype=int)
        img_h = CArray([], dtype=int)
        img_c = CArray([], dtype=int)

        # Load files!
        patterns, img_w, img_h, img_c = self._load_files(ds_path,
                                                         img_w,
                                                         img_h,
                                                         img_c,
                                                         img_ext,
                                                         load_data=load_data)

        labels = CArray.load(fm.join(ds_path, 'clients.txt'),
                             dtype=label_dtype).ravel()

        if patterns.shape[0] != labels.size:
            raise ValueError("patterns ({:}) and labels ({:}) do not have "
                             "the same number of elements.".format(
                                 patterns.shape[0], labels.size))

        # Load the file with extra dataset attributes (optional)
        attributes_path = fm.join(ds_path, 'attributes.txt')
        attributes = load_dict(attributes_path) if \
            fm.file_exist(attributes_path) else dict()

        self.logger.info("Loaded {:} images from {:}...".format(
            patterns.shape[0], ds_path))

        header = CDatasetHeader(id=fm.split(ds_path)[1],
                                img_w=img_w,
                                img_h=img_h,
                                img_c=img_c,
                                **attributes)

        return CDataset(patterns, labels, header=header)