def test_save_load_dense(self): """Test save/load of CArray""" self.logger.info( "UNITTEST - CSparse - Testing save/load for dense matrix") self.array_dense.save(self.test_file, overwrite=True) loaded_array_dense = CArray.load(self.test_file, arrayformat='dense', dtype=int) self.assertFalse((loaded_array_dense != self.array_dense).any(), "Saved and loaded arrays (sparse) are not equal!") # Only 'dense' and 'sparse' arrayformat are supported with self.assertRaises(ValueError): CArray.load(self.test_file, arrayformat='test')
def _test_rule(self, rule, n_prototypes=20, random_state=None): """Generic test case for prototype selectors.""" self.logger.info("Testing: " + rule + " selector.") ps = CPrototypesSelector.create(rule) ps.verbose = 2 if random_state is None: ds_reduced = ps.select(self.dataset, n_prototypes=n_prototypes) else: ds_reduced = ps.select(self.dataset, n_prototypes=n_prototypes, random_state=random_state) idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule)) self.assert_array_equal(ps.sel_idx, CArray.load(idx_path, dtype=int).ravel()) if self.plots is True: self.draw_selection(ds_reduced, rule)
def test_ps_kmedians(self): rule = 'k-medians' self.logger.info("Testing: " + rule + " selector.") ps = CPrototypesSelector.create(rule) ps.verbose = 2 ds_reduced = ps.select(self.dataset, n_prototypes=20, random_state=0) # this test will fail with sklearn < 0.22, because of an issue in # random_state setting inside the k-means algorithm import sklearn from pkg_resources import parse_version if not parse_version(sklearn.__version__) < parse_version("0.22"): idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule)) self.assert_array_equal(ps.sel_idx, CArray.load(idx_path, dtype=int).ravel()) if self.plots is True: self.draw_selection(ds_reduced, rule)
def test_save_load_sparse_conversion(self): """Test save/load of CArray""" # Array should be stored and loaded correctly whatever sparse format self.array_sparse._data._data = self.array_sparse._data.todok() self.array_sparse.save(self.test_file) # Saving to a file handle is not supported for sparse arrays with self.assertRaises(NotImplementedError): with open(self.test_file_2, 'w') as f: self.array_sparse.save(f) loaded_array_sparse = CArray.load(self.test_file, arrayformat='sparse', dtype=int) self.assertFalse((loaded_array_sparse != self.array_sparse).any(), "Saved and loaded arrays (sparse) are not equal!")
def test_save_load_sparse(self): """Test save/load of CArray""" self.logger.info( "UNITTEST - CArray - Testing save/load for sparse matrix") self.array_sparse.save(self.test_file) # Saving to a file handle is not supported for sparse arrays with self.assertRaises(NotImplementedError): with open(self.test_file_2, 'w') as f: self.array_sparse.save(f) loaded_array_sparse = CArray.load(self.test_file, arrayformat='sparse', dtype=int) self.assertFalse((loaded_array_sparse != self.array_sparse).any(), "Saved and loaded arrays (sparse) are not equal!")
def test_ps_kmedians(self): rule = 'k-medians' self.logger.info("Testing: " + rule + " selector.") ps = CPrototypesSelector.create(rule) ps.verbose = 2 ds_reduced = ps.select(self.dataset, n_prototypes=20, random_state=0) if self.plots is True: self.draw_selection(ds_reduced, rule) # k_means in sklearn > 0.24 returns a different result import sklearn from pkg_resources import parse_version if parse_version(sklearn.__version__) < parse_version("0.24"): idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule)) else: idx_path = fm.join(fm.abspath(__file__), "idx_{:}_sk0-24.gz".format(rule)) self.assert_array_equal(ps.sel_idx, CArray.load(idx_path, dtype=int).ravel())
def load(self, ds_path, img_format, label_dtype=None, load_data=True): """Load all images of specified format inside given path. Extra dataset attributes: - 'id': last `ds_path` folder. - 'img_w', 'img_h': size of the images in pixels. - 'img_c': images number of channels. - Any other custom attribute is retrieved from 'attributes.txt' file. Only attributes of `str` type are currently supported. Parameters ---------- ds_path : str Full path to dataset folder. img_format : str Format of the files to load. label_dtype : str or dtype, optional Datatype of the labels. If None, labels will be strings. load_data : bool, optional If True (default) features will be stored. Otherwise store the paths to the files with dtype=object. """ # Labels file MUST be available if not fm.file_exist(fm.join(ds_path, 'clients.txt')): raise OSError("cannot load clients file.") # Ensuring 'img_format' always has an extension-like pattern img_ext = '.' + img_format.strip('.').lower() # Dimensions of each image img_w = CArray([], dtype=int) img_h = CArray([], dtype=int) img_c = CArray([], dtype=int) # Load files! patterns, img_w, img_h, img_c = self._load_files(ds_path, img_w, img_h, img_c, img_ext, load_data=load_data) labels = CArray.load(fm.join(ds_path, 'clients.txt'), dtype=label_dtype).ravel() if patterns.shape[0] != labels.size: raise ValueError("patterns ({:}) and labels ({:}) do not have " "the same number of elements.".format( patterns.shape[0], labels.size)) # Load the file with extra dataset attributes (optional) attributes_path = fm.join(ds_path, 'attributes.txt') attributes = load_dict(attributes_path) if \ fm.file_exist(attributes_path) else dict() self.logger.info("Loaded {:} images from {:}...".format( patterns.shape[0], ds_path)) header = CDatasetHeader(id=fm.split(ds_path)[1], img_w=img_w, img_h=img_h, img_c=img_c, **attributes) return CDataset(patterns, labels, header=header)