def __init__(self): # Build paths of MNIST dataset self.train_data_path = fm.join(MNIST_PATH, 'train-images-idx3-ubyte') self.train_labels_path = fm.join(MNIST_PATH, 'train-labels-idx1-ubyte') self.test_data_path = fm.join(MNIST_PATH, 't10k-images-idx3-ubyte') self.test_labels_path = fm.join(MNIST_PATH, 't10k-labels-idx1-ubyte') with CDataLoaderMNIST.__lock: # For each file check if already downloaded and extracted if not fm.file_exist(self.train_data_path) or \ md5(self.train_data_path) != TRAIN_DATA_MD5: self._get_data(TRAIN_DATA_FILE, MNIST_PATH, self.train_data_path, TRAIN_DATA_MD5) if not fm.file_exist(self.train_labels_path) or \ md5(self.train_labels_path) != TRAIN_LABELS_MD5: self._get_data(TRAIN_LABELS_FILE, MNIST_PATH, self.train_labels_path, TRAIN_LABELS_MD5) if not fm.file_exist(self.test_data_path) or \ md5(self.test_data_path) != TEST_DATA_MD5: self._get_data(TEST_DATA_FILE, MNIST_PATH, self.test_data_path, TEST_DATA_MD5) if not fm.file_exist(self.test_labels_path) or \ md5(self.test_labels_path) != TEST_LABELS_MD5: self._get_data(TEST_LABELS_FILE, MNIST_PATH, self.test_labels_path, TEST_LABELS_MD5)
def tearDown(self): # Remove existing 'models_dict.json' before testing if fm.file_exist(MODELS_DICT_PATH): fm.remove_file(MODELS_DICT_PATH) # Removing folder with test model (force 'cause not empty) if fm.folder_exist(fm.join(SECML_MODELS_DIR, '_test')): fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True)
def __init__(self): self._train_path = fm.join(ICUBWORLD28_PATH, 'train') self._test_path = fm.join(ICUBWORLD28_PATH, 'test') with CDataLoaderICubWorld28.__lock: # Download (if needed) data and extract it if not fm.folder_exist(self._train_path) \ or not fm.folder_exist(self._test_path): self._get_data(ICUBWORLD28_URL, ICUBWORLD28_PATH)
def _load_files(self, dir_path, img_w, img_h, img_c, img_ext, label_re=None, load_data=True): """Loads any file with given extension inside input folder.""" # Folders/files will be loaded in alphabetical order files_list = sorted(fm.listdir(dir_path)) # Placeholder for patterns/labels CArray patterns = None labels = None for file_name in files_list: # Full path to image file file_path = fm.join(dir_path, file_name) # Load only files of the specified format if fm.splitext(file_name)[1].lower() == img_ext: # Opening image in lazy mode (to verify dimensions etc.) img = Image.open(file_path) # Storing image dimensions... img_w = img_w.append(img.width) img_h = img_h.append(img.height) img_c = img_c.append(len(img.getbands())) # If load_data is True, store features, else store path if load_data is True: # Storing image as a 2D CArray array_img = CArray(img.getdata()).ravel().atleast_2d() else: array_img = CArray([[file_path]]) # Creating the 2D array patterns x features patterns = patterns.append( array_img, axis=0) if patterns is not None else array_img # Consider only the directory name to set the label dir_name = fm.split(dir_path)[1] # label is the image's containing folder name or the re result c_id = dir_name if label_re is None \ else re.search(label_re, dir_name).group(0) labels = labels.append(c_id) if labels is not None \ else CArray(c_id) self.logger.debug("{:} has been loaded..." "".format(fm.join(dir_path, file_name))) return patterns, labels, img_w, img_h, img_c
def _explore_dir(self, dir_path, img_w, img_h, img_c, img_ext, label_re=None, load_data=True): """Explore input directory and load files if leaf.""" # Folders/files will be loaded in alphabetical order items_list = sorted(fm.listdir(dir_path)) # A leaf folder is a folder with only files in it leaf = not any( fm.folder_exist(fm.join(dir_path, item)) for item in items_list) if leaf is True: # Leaf directory, time to load files! return self._load_files(dir_path, img_w, img_h, img_c, img_ext, label_re=label_re, load_data=load_data) # Placeholder for patterns/labels CArray patterns = None labels = None for subdir in items_list: subdir_path = fm.join(dir_path, subdir) # Only consider folders (there could be also files) if not fm.folder_exist(subdir_path): continue # Explore next subfolder patterns_new, labels_new, img_w, img_h, img_c = self._explore_dir( subdir_path, img_w, img_h, img_c, img_ext, label_re=label_re, load_data=load_data) patterns = patterns.append(patterns_new, axis=0) \ if patterns is not None else patterns_new labels = labels.append(labels_new) \ if labels is not None else labels_new return patterns, labels, img_w, img_h, img_c
def setUpClass(cls): CUnitTest.setUpClass() # We now prepare all the urls and path required to mock requests # via gitlab API to https://gitlab.com/secml/secml-zoo repository # Fake models definitions cls.test_models_def = \ fm.join(fm.abspath(__file__), 'models_dict_test.json') # Test model's definition cls.test_model_id = '_test_model' cls.test_model = \ fm.join(fm.abspath(__file__), '_test_model_clf.py') cls.test_model_state = \ fm.join(fm.abspath(__file__), '_test_model-clf.gz') # Url for mocking requests to the model zoo repository repo = parse.quote('secml/secml-zoo', safe='') file_model = parse.quote('models/_test/_test_model_clf.py', safe='') file_state = parse.quote('models/_test/_test_model-clf.gz', safe='') file_defs = parse.quote('models_dict.json', safe='') vers = 'v' + re.search(r'^\d+.\d+', secml.__version__).group(0) api_url = 'https://gitlab.com/api/v4/projects/' \ '{:}/repository/files/{:}/raw?ref={:}' # One url for master branch, one for current library version # One for model file, one for state file cls.api_url_model_master = api_url.format(repo, file_model, 'master') cls.api_url_model_vers = api_url.format(repo, file_model, vers) cls.api_url_state_master = api_url.format(repo, file_state, 'master') cls.api_url_state_vers = api_url.format(repo, file_state, vers) cls.api_url_defs_master = api_url.format(repo, file_defs, 'master') cls.api_url_defs_vers = api_url.format(repo, file_defs, vers) cls.api_model_headers = { 'Content-Disposition': r'inline; filename="_test_model_clf.py"' } cls.api_state_headers = { 'Content-Disposition': r'inline; filename="_test_model-clf.gz"' } cls.api_defs_headers = { 'Content-Disposition': r'inline; filename="models_dict.json"' } # Set the debug level of models loader to debug _logger.set_level('DEBUG')
def _test_load_model(self, defs_url, model_url, state_url): """Test for `load_model` valid behavior. We test the following: - all valid requests - a need for updating models dict and redownload model - a need for updating models dict and redownload model with a connection error when download models dict Parameters ---------- defs_url : str or None, optional model_url : str or None, optional state_url : str or None, optional """ with requests_mock.Mocker() as m: # Simulate a fine process, with all resources available self._mock_requests(m, defs_url=defs_url, model_url=model_url, state_url=state_url) self._check_test_model() # Call model loading # We now simulate a need for `models_dict.json` update # by removing `.last_update` file fm.remove_file(fm.join(SECML_MODELS_DIR, '.last_update')) # Also remove test model to force re-download fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True) self._check_test_model() # Call model loading # We now simulate a need for `models_dict.json` update, # but a connection error occurs (simulated by not mocking dl url) # Last available version of models dict should be used fm.remove_file(fm.join(SECML_MODELS_DIR, '.last_update')) fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True) with requests_mock.Mocker() as m: # Do not mock the url for models definitions self._mock_requests(m, defs_url=None, model_url=model_url, state_url=state_url) self._check_test_model() # Call model loading
def test_plot(self): """ Compare the classifiers graphically""" ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, random_state=1).load() fig = self._test_plot(self.nc, ds, [-10]) fig.savefig(fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_nearest_centroid.pdf'))
def test_load_paths(self): """Testing img dataset path loading.""" dl = CDataLoaderImgClients() self.logger.info("Testing loading paths of clients dataset...") ds_path = fm.join(fm.abspath(__file__), "ds_clients") ds = dl.load(ds_path=ds_path, img_format='jpeg', load_data=False) self.logger.info( "Loaded {:} images of {:} features, {:} classes".format( ds.num_samples, ds.num_features, ds.num_classes)) # TODO: USE 'U' AFTER TRANSITION TO PYTHON 3 self.assertIn(ds.X.dtype.char, ('S', 'U')) # Checking correct label-img association self.assertEqual(ds.Y[0].item(), fm.split(ds.X[0, :].item())[1].replace('.jpeg', '')) self.assertEqual(ds.Y[1].item(), fm.split(ds.X[1, :].item())[1].replace('.jpeg', '')) # Checking behavior of `get_labels_ovr` ovr = ds.get_labels_ovr(pos_label='tiger') # Y : ['coyote', 'tiger'] self.assert_array_equal(ovr, CArray([0, 1]))
def test_plot(self): """ Compare the classifiers graphically""" ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, random_state=1).load() fig = self._test_plot(self.rnd_forest, ds, levels=[0.5]) fig.savefig(fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_random_forest.pdf'))
def _load_class_names(self, meta_file, class_names_key): """Load the names for the classes in the CIFAR dataset. Parameters ---------- meta_file : str Name of the metafile where the labels are stored. class_names_key : bytes Dictionary key where the labels are stored. Returns ---------- dict A dictionary with the label of each class. """ meta_file_url = fm.join(self.data_path, meta_file) # Load the class-names from the pickled file. with open(meta_file_url, 'rb') as mf: raw = pickle.load(mf, encoding='bytes')[class_names_key] # Convert from binary strings. names = {i: x.decode('utf-8') for i, x in enumerate(raw)} return names
def _test_plot(self, evas): """Check if `stored_vars` is correctly populated. Parameters ---------- evas : CAttackEvasionCleverhans """ if self.make_figures is False: self.logger.debug("Skipping figures...") return fig = CFigure() fig.sp.plot_path(evas.x_seq) fig.sp.plot_fun(evas.objective_function, plot_levels=False, multipoint=True, n_grid_points=50) fig.sp.plot_decision_regions(self.clf, plot_background=False, n_grid_points=100) fig.title("ATTACK: {}, y_target: {}".format( evas._clvrh_attack_class.__name__, self.y_target)) name_file = '{}_evasion2D_target_{}.pdf'.format( evas._clvrh_attack_class.__name__, self.y_target) fig.savefig(fm.join(self.images_folder, name_file), file_format='pdf')
def test_plot(self): """ Compare the classifiers graphically""" ds = CDLRandom(n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, random_state=0).load() ds.X = CNormalizerMinMax().fit_transform(ds.X) fig = self._test_plot(self.ridges[0], ds) fig.savefig(fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_ridge.pdf'))
def test_plot(self): ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, random_state=1).load() fig = self._test_plot(self.knn, ds, levels=[0.5]) fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_knn.pdf'))
def _save_fig(self): """Visualizing the function being optimized with line search.""" x_range = CArray.arange(-5, 20, 0.5, ) score_range = x_range.T.apply_along_axis(self.fun.fun, axis=1) ref_line = CArray.zeros(x_range.size) fig = CFigure(height=6, width=12) fig.sp.plot(x_range, score_range, color='b') fig.sp.plot(x_range, ref_line, color='k') filename = fm.join(fm.abspath(__file__), 'test_line_search_bisect.pdf') fig.savefig(filename)
def _load_files(self, ds_path, img_w, img_h, img_c, img_ext, load_data=True): """Loads any file with given extension inside input folder.""" # Files will be loaded in alphabetical order files_list = sorted(fm.listdir(ds_path)) # Placeholder for patterns CArray patterns = None for file_name in files_list: # Full path to image file file_path = fm.join(ds_path, file_name) # Load only files of the specified format if fm.splitext(file_name)[1].lower() == img_ext: # Opening image in lazy mode (to verify dimensions etc.) img = Image.open(file_path) # Storing image dimensions... img_w = img_w.append(img.width) img_h = img_h.append(img.height) img_c = img_c.append(len(img.getbands())) # If load_data is True, store features, else store path if load_data is True: # Storing image as a 2D CArray array_img = CArray(img.getdata()).ravel().atleast_2d() else: array_img = CArray([[file_path]]) # Creating the 2D array patterns x features patterns = patterns.append( array_img, axis=0) if patterns is not None else array_img self.logger.debug("{:} has been loaded..." "".format(fm.join(ds_path, file_name))) return patterns, img_w, img_h, img_c
def test_save_and_load_svmlight_file(self): """Testing libsvm dataset loading and saving.""" self.logger.info("Testing libsvm dataset loading and saving...") test_file = fm.join(fm.abspath(__file__), "myfile.libsvm") # Cleaning test file try: fm.remove_file(test_file) except (OSError, IOError) as e: if e.errno != 2: raise e self.logger.info("Patterns saved:\n{:}".format(self.patterns)) self.logger.info("Labels saved:\n{:}".format(self.labels)) CDataLoaderSvmLight.dump(CDataset(self.patterns, self.labels), test_file) new_dataset = CDataLoaderSvmLight().load(test_file) self.assertFalse((new_dataset.X != self.patterns).any()) self.assertFalse((new_dataset.Y != self.labels).any()) # load data but now remove all zero features (colums) new_dataset = CDataLoaderSvmLight().load(test_file, remove_all_zero=True) self.logger.info("Patterns loaded:\n{:}".format(new_dataset.X)) self.logger.info("Labels loaded:\n{:}".format(new_dataset.Y)) self.logger.info("Mapping back:\n{:}".format( new_dataset.header.idx_mapping)) self.assertTrue(new_dataset.X.issparse) self.assertTrue(new_dataset.Y.isdense) self.assertTrue(new_dataset.header.idx_mapping.isdense) # non-zero elements should be unchanged self.assertEqual(self.patterns.nnz, new_dataset.X.nnz) new_nnz_data = new_dataset.X.nnz_data self.assertFalse((self.patterns.nnz_data != new_nnz_data.sort()).any()) # With idx_mapping we should be able to reconstruct original data original = CArray.zeros(self.patterns.shape, sparse=True) original[:, new_dataset.header.idx_mapping] = new_dataset.X self.assertFalse((self.patterns != original).any()) # Cleaning test file try: fm.remove_file(test_file) except (OSError, IOError) as e: if e.errno != 2: raise e
def test_ps_kmedians(self): rule = 'k-medians' self.logger.info("Testing: " + rule + " selector.") ps = CPrototypesSelector.create(rule) ps.verbose = 2 ds_reduced = ps.select(self.dataset, n_prototypes=20, random_state=0) if self.plots is True: self.draw_selection(ds_reduced, rule) # k_means in sklearn > 0.24 returns a different result import sklearn from pkg_resources import parse_version if parse_version(sklearn.__version__) < parse_version("0.24"): idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule)) else: idx_path = fm.join(fm.abspath(__file__), "idx_{:}_sk0-24.gz".format(rule)) self.assert_array_equal(ps.sel_idx, CArray.load(idx_path, dtype=int).ravel())
def clean_tmp(): """Cleans temporary files created by the DB loader. This method deletes the joblib-related files created while loading the database. Does not delete the downloaded database archive. """ jl_tmp_folder = fm.join(SECML_DS_DIR, 'lfw_home', 'joblib') if fm.folder_exist(jl_tmp_folder): fm.remove_folder(jl_tmp_folder, force=True)
def _load_mnist(): """Load MNIST 4971 dataset.""" digits = [4, 9, 7, 1] digits_str = "".join(['{:}-'.format(i) for i in digits[:-1]]) digits_str += '{:}'.format(digits[-1]) # FIXME: REMOVE THIS AFTER CDATALOADERS AUTOMATICALLY STORE DS tr_file = fm.join(fm.abspath(__file__), 'mnist_tr_{:}.gz'.format(digits_str)) if not fm.file_exist(tr_file): loader = CDataLoaderMNIST() tr = loader.load('training', digits=digits) pickle_utils.save(tr_file, tr) else: tr = pickle_utils.load(tr_file, encoding='latin1') ts_file = fm.join(fm.abspath(__file__), 'mnist_ts_{:}.gz'.format(digits_str)) if not fm.file_exist(ts_file): loader = CDataLoaderMNIST() ts = loader.load('testing', digits=digits) pickle_utils.save(ts_file, ts) else: ts = pickle_utils.load(ts_file, encoding='latin1') idx = CArray.arange(tr.num_samples) val_dts_idx = CArray.randsample(idx, 200, random_state=0) val_dts = tr[val_dts_idx, :] tr_dts_idx = CArray.randsample(idx, 200, random_state=0) tr = tr[tr_dts_idx, :] idx = CArray.arange(0, ts.num_samples) ts_dts_idx = CArray.randsample(idx, 200, random_state=0) ts = ts[ts_dts_idx, :] tr.X /= 255.0 ts.X /= 255.0 return tr, val_dts, ts, digits, tr.header.img_w, tr.header.img_h
def _plot_2d_evasion(self, evas, ds, x0, filename, th=0, grid_limits=None): """Plot evasion attack results for 2D data. Parameters ---------- evas : CAttackEvasion ds : CDataset x0 : CArray Initial attack point. filename : str Name of the output pdf file. th : scalar, optional Scores threshold of the classifier. Default 0. grid_limits : list of tuple or None, optional If not specified, will be set as [(-1.5, 1.5), (-1.5, 1.5)]. """ if self.make_figures is False: self.logger.debug("Skipping figures...") return fig = CFigure(height=6, width=6) if grid_limits is None: grid_limits = [(-1.5, 1.5), (-1.5, 1.5)] fig.sp.plot_ds(ds) fig.sp.plot_fun(func=evas.objective_function, grid_limits=grid_limits, colorbar=False, n_grid_points=50, plot_levels=False) fig.sp.plot_decision_regions(clf=evas.classifier, plot_background=False, grid_limits=grid_limits, n_grid_points=50) fig.sp.plot_constraint(self._box(evas), n_grid_points=20, grid_limits=grid_limits) fig.sp.plot_fun(func=lambda z: self._constr(evas, x0).constraint(z), plot_background=False, n_grid_points=50, grid_limits=grid_limits, levels=[0], colorbar=False) fig.sp.plot_path(evas.x_seq) fig.savefig(fm.join(self.images_folder, filename), file_format='pdf')
def _get_data(self, file_url, dl_folder): """Download input datafile, unzip and store in output_path. Parameters ---------- file_url : str URL of the file to download. dl_folder : str Path to the folder where to store the downloaded file. """ f_dl = fm.join(dl_folder, 'iCubWorld28_128x128.zip?dl=1') if not fm.file_exist(f_dl) or md5(f_dl) != ICUBWORLD28_MD5: # Generate the full path to the downloaded file f_dl = dl_file(file_url, dl_folder, md5_digest=ICUBWORLD28_MD5) self.logger.info("Extracting files...") # Extract the content of downloaded file zipfile.ZipFile(f_dl, 'r').extractall(dl_folder) # Remove downloaded file fm.remove_file(f_dl) # iCubWorld28 zip file contains a macosx private folder, clean it up if fm.folder_exist(fm.join(ICUBWORLD28_PATH, '__MACOSX')): fm.remove_folder(fm.join(ICUBWORLD28_PATH, '__MACOSX'), force=True) # iCubWorld28 zip file contains a macosx private files, clean it up for dirpath, dirnames, filenames in os.walk(ICUBWORLD28_PATH): for file in filenames: if fnmatch(file, '.DS_Store'): fm.remove_file(fm.join(dirpath, file)) # Now move all data to an upper folder if needed if not fm.folder_exist(self._train_path) \ or not fm.folder_exist(self._test_path): sub_d = fm.join(dl_folder, fm.listdir(dl_folder)[0]) for e in fm.listdir(sub_d): e_full = fm.join(sub_d, e) # Full path to current element try: # Call copy_file or copy_folder when applicable if fm.file_exist(e_full) is True: fm.copy_file(e_full, dl_folder) elif fm.folder_exist(e_full) is True: fm.copy_folder(e_full, fm.join(dl_folder, e)) except: pass # Check that the main dataset file is now in the correct folder if not fm.folder_exist(self._train_path) \ or not fm.folder_exist(self._test_path): raise RuntimeError("dataset main file not available!") # The subdirectory can now be removed fm.remove_folder(sub_d, force=True)
def test_save_load(self): a = CArray([1, 2, 3]) # Dummy test array # Generate a temp file to test import tempfile tempdir = tempfile.gettempdir() tempfile = fm.join(tempdir, 'secml_testpickle') tempfile = pickle_utils.save(tempfile, a) a_loaded = pickle_utils.load(tempfile) self.assert_array_equal(a_loaded, a)
def test_load_img(self): """Testing img dataset loading.""" dl = CDataLoaderImgFolders() self.logger.info("Testing loading rgb dataset...") ds_rgb_path = fm.join(fm.abspath(__file__), "ds_rgb") ds = dl.load(ds_path=ds_rgb_path, img_format='jpeg') self.logger.info( "Loaded {:} images of {:} features, {:} classes".format( ds.num_samples, ds.num_features, ds.num_classes)) self.assertEqual((2, 151875), ds.X.shape) self.assertEqual(2, ds.num_classes) self.assertTrue((ds.header.img_w == 225).all()) self.assertTrue((ds.header.img_h == 225).all()) self.assertTrue((ds.header.img_c == 3).all()) self.logger.info("Testing loading grayscale dataset...") ds_gray_path = fm.join(fm.abspath(__file__), "ds_gray") ds = dl.load(ds_path=ds_gray_path, img_format='jpeg') self.logger.info( "Loaded {:} images of {:} features, {:} classes".format( ds.num_samples, ds.num_features, ds.num_classes)) self.assertEqual((2, 50625), ds.X.shape) self.assertEqual(2, ds.num_classes) self.assertTrue((ds.header.img_w == 225).all()) self.assertTrue((ds.header.img_h == 225).all()) self.assertTrue((ds.header.img_c == 1).all())
def __init__(self): # Extract the name of the data file from the url self.data_file = self.data_url.split('/')[-1] # Path to the downloaded dataset file data_file_path = fm.join(CIFAR_PATH, self.data_file) with CDataLoaderCIFAR.__lock: # Download (if needed) data and extract it if not fm.file_exist(data_file_path) or \ md5(data_file_path) != self.data_md5: self._get_data(self.data_url, CIFAR_PATH) elif not fm.folder_exist(self.data_path): # Downloaded datafile seems valid, extract only self._get_data(self.data_url, CIFAR_PATH, extract_only=True)
def _test_rule(self, rule, n_prototypes=20, random_state=None): """Generic test case for prototype selectors.""" self.logger.info("Testing: " + rule + " selector.") ps = CPrototypesSelector.create(rule) ps.verbose = 2 if random_state is None: ds_reduced = ps.select(self.dataset, n_prototypes=n_prototypes) else: ds_reduced = ps.select(self.dataset, n_prototypes=n_prototypes, random_state=random_state) idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule)) self.assert_array_equal(ps.sel_idx, CArray.load(idx_path, dtype=int).ravel()) if self.plots is True: self.draw_selection(ds_reduced, rule)
def test_dl_file_md5(self): # Fixed long string to write to the file x = b'abcd' * 10000 # Expected digest of the file md5_test = '3f0f597c3c69ce42e554fdad3adcbeea' # Generate a temp file to test and write content tempf = fm.join(self.tempdir, 'test_dl_file_md5') with open(tempf, 'wb') as fp: fp.write(x) md5_digest = md5(fp.name) self.logger.info("MD5: {:}".format(md5_digest)) self.assertEqual(md5_test, md5_digest)
def test_ps_kmedians(self): rule = 'k-medians' self.logger.info("Testing: " + rule + " selector.") ps = CPrototypesSelector.create(rule) ps.verbose = 2 ds_reduced = ps.select(self.dataset, n_prototypes=20, random_state=0) # this test will fail with sklearn < 0.22, because of an issue in # random_state setting inside the k-means algorithm import sklearn from pkg_resources import parse_version if not parse_version(sklearn.__version__) < parse_version("0.22"): idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule)) self.assert_array_equal(ps.sel_idx, CArray.load(idx_path, dtype=int).ravel()) if self.plots is True: self.draw_selection(ds_reduced, rule)
def test_save_load(self): """Test save/load of sparse arrays""" self.logger.info("UNITTEST - CSparse - save/load") test_file = fm.join(fm.abspath(__file__), 'test.txt') # Cleaning test file try: fm.remove_file(test_file) except (OSError, IOError) as e: if e.errno != 2: raise e self.logger.info( "UNITTEST - CSparse - Testing save/load for sparse matrix") self.sparse_matrix.save(test_file) self.logger.info( "Saving again with overwrite=False... IOError should be raised.") with self.assertRaises(IOError) as e: self.sparse_matrix.save(test_file) self.logger.info(e.exception) loaded_sparse_matrix = CSparse.load(test_file, dtype=int) self.assertFalse((loaded_sparse_matrix != self.sparse_matrix).any(), "Saved and loaded arrays (matrices) are not equal!") self.logger.info( "UNITTEST - CSparse - Testing save/load for sparse vector") self.sparse_vector.save(test_file, overwrite=True) loaded_sparse_vector = CSparse.load(test_file, dtype=int) self.assertFalse((loaded_sparse_vector != self.sparse_vector).any(), "Saved and loaded arrays (vectors) are not equal!") # Cleaning test file try: fm.remove_file(test_file) except (OSError, IOError) as e: if e.errno != 2: raise e
def test_set_get_state(self): """Test for set_state and get_state.""" pre = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}]) clf = CClassifierSkLearn(sklearn_model=SVC(kernel="rbf", gamma=2, C=1, random_state=0), preprocess=pre) clf.fit(self.dataset) pred_y = clf.predict(self.dataset.X) self.logger.info( "Predictions before restoring state:\n{:}".format(pred_y)) state = clf.get_state() self.logger.info("State of multiclass:\n{:}".format(state)) # Generate a temp file to test import tempfile from secml.utils import fm tempdir = tempfile.gettempdir() tempfile = fm.join(tempdir, 'secml_testgetsetstate') # Test save state to disk tempfile = clf.save_state(tempfile) # Create an entirely new clf pre_post = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}]) clf_post = CClassifierSkLearn(sklearn_model=SVC(kernel="rbf", gamma=2, C=1, random_state=0), preprocess=pre_post) # Restore state from disk clf_post.load_state(tempfile) pred_y_post = clf_post.predict(self.dataset.X) self.logger.info( "Predictions after restoring state:\n{:}".format(pred_y_post)) self.assert_array_equal(pred_y, pred_y_post)