Exemple #1
0
    def __init__(self):

        # Build paths of MNIST dataset
        self.train_data_path = fm.join(MNIST_PATH, 'train-images-idx3-ubyte')
        self.train_labels_path = fm.join(MNIST_PATH, 'train-labels-idx1-ubyte')
        self.test_data_path = fm.join(MNIST_PATH, 't10k-images-idx3-ubyte')
        self.test_labels_path = fm.join(MNIST_PATH, 't10k-labels-idx1-ubyte')

        with CDataLoaderMNIST.__lock:
            # For each file check if already downloaded and extracted
            if not fm.file_exist(self.train_data_path) or \
                    md5(self.train_data_path) != TRAIN_DATA_MD5:
                self._get_data(TRAIN_DATA_FILE, MNIST_PATH,
                               self.train_data_path, TRAIN_DATA_MD5)
            if not fm.file_exist(self.train_labels_path) or \
                    md5(self.train_labels_path) != TRAIN_LABELS_MD5:
                self._get_data(TRAIN_LABELS_FILE, MNIST_PATH,
                               self.train_labels_path, TRAIN_LABELS_MD5)
            if not fm.file_exist(self.test_data_path) or \
                    md5(self.test_data_path) != TEST_DATA_MD5:
                self._get_data(TEST_DATA_FILE, MNIST_PATH, self.test_data_path,
                               TEST_DATA_MD5)
            if not fm.file_exist(self.test_labels_path) or \
                    md5(self.test_labels_path) != TEST_LABELS_MD5:
                self._get_data(TEST_LABELS_FILE, MNIST_PATH,
                               self.test_labels_path, TEST_LABELS_MD5)
Exemple #2
0
    def tearDown(self):

        # Remove existing 'models_dict.json' before testing
        if fm.file_exist(MODELS_DICT_PATH):
            fm.remove_file(MODELS_DICT_PATH)

        # Removing folder with test model (force 'cause not empty)
        if fm.folder_exist(fm.join(SECML_MODELS_DIR, '_test')):
            fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True)
    def __init__(self):

        self._train_path = fm.join(ICUBWORLD28_PATH, 'train')
        self._test_path = fm.join(ICUBWORLD28_PATH, 'test')

        with CDataLoaderICubWorld28.__lock:
            # Download (if needed) data and extract it
            if not fm.folder_exist(self._train_path) \
                    or not fm.folder_exist(self._test_path):
                self._get_data(ICUBWORLD28_URL, ICUBWORLD28_PATH)
Exemple #4
0
    def _load_files(self,
                    dir_path,
                    img_w,
                    img_h,
                    img_c,
                    img_ext,
                    label_re=None,
                    load_data=True):
        """Loads any file with given extension inside input folder."""
        # Folders/files will be loaded in alphabetical order
        files_list = sorted(fm.listdir(dir_path))

        # Placeholder for patterns/labels CArray
        patterns = None
        labels = None
        for file_name in files_list:

            # Full path to image file
            file_path = fm.join(dir_path, file_name)

            # Load only files of the specified format
            if fm.splitext(file_name)[1].lower() == img_ext:
                # Opening image in lazy mode (to verify dimensions etc.)
                img = Image.open(file_path)

                # Storing image dimensions...
                img_w = img_w.append(img.width)
                img_h = img_h.append(img.height)
                img_c = img_c.append(len(img.getbands()))

                # If load_data is True, store features, else store path
                if load_data is True:
                    # Storing image as a 2D CArray
                    array_img = CArray(img.getdata()).ravel().atleast_2d()
                else:
                    array_img = CArray([[file_path]])

                # Creating the 2D array patterns x features
                patterns = patterns.append(
                    array_img, axis=0) if patterns is not None else array_img

                # Consider only the directory name to set the label
                dir_name = fm.split(dir_path)[1]
                # label is the image's containing folder name or the re result
                c_id = dir_name if label_re is None \
                    else re.search(label_re, dir_name).group(0)
                labels = labels.append(c_id) if labels is not None \
                    else CArray(c_id)

                self.logger.debug("{:} has been loaded..."
                                  "".format(fm.join(dir_path, file_name)))

        return patterns, labels, img_w, img_h, img_c
Exemple #5
0
    def _explore_dir(self,
                     dir_path,
                     img_w,
                     img_h,
                     img_c,
                     img_ext,
                     label_re=None,
                     load_data=True):
        """Explore input directory and load files if leaf."""
        # Folders/files will be loaded in alphabetical order
        items_list = sorted(fm.listdir(dir_path))

        # A leaf folder is a folder with only files in it
        leaf = not any(
            fm.folder_exist(fm.join(dir_path, item)) for item in items_list)

        if leaf is True:  # Leaf directory, time to load files!
            return self._load_files(dir_path,
                                    img_w,
                                    img_h,
                                    img_c,
                                    img_ext,
                                    label_re=label_re,
                                    load_data=load_data)

        # Placeholder for patterns/labels CArray
        patterns = None
        labels = None
        for subdir in items_list:

            subdir_path = fm.join(dir_path, subdir)

            # Only consider folders (there could be also files)
            if not fm.folder_exist(subdir_path):
                continue

            # Explore next subfolder
            patterns_new, labels_new, img_w, img_h, img_c = self._explore_dir(
                subdir_path,
                img_w,
                img_h,
                img_c,
                img_ext,
                label_re=label_re,
                load_data=load_data)

            patterns = patterns.append(patterns_new, axis=0) \
                if patterns is not None else patterns_new
            labels = labels.append(labels_new) \
                if labels is not None else labels_new

        return patterns, labels, img_w, img_h, img_c
Exemple #6
0
    def setUpClass(cls):

        CUnitTest.setUpClass()

        # We now prepare all the urls and path required to mock requests
        # via gitlab API to https://gitlab.com/secml/secml-zoo repository

        # Fake models definitions
        cls.test_models_def = \
            fm.join(fm.abspath(__file__), 'models_dict_test.json')

        # Test model's definition
        cls.test_model_id = '_test_model'
        cls.test_model = \
            fm.join(fm.abspath(__file__), '_test_model_clf.py')
        cls.test_model_state = \
            fm.join(fm.abspath(__file__), '_test_model-clf.gz')

        # Url for mocking requests to the model zoo repository
        repo = parse.quote('secml/secml-zoo', safe='')
        file_model = parse.quote('models/_test/_test_model_clf.py', safe='')
        file_state = parse.quote('models/_test/_test_model-clf.gz', safe='')
        file_defs = parse.quote('models_dict.json', safe='')
        vers = 'v' + re.search(r'^\d+.\d+', secml.__version__).group(0)

        api_url = 'https://gitlab.com/api/v4/projects/' \
                  '{:}/repository/files/{:}/raw?ref={:}'

        # One url for master branch, one for current library version
        # One for model file, one for state file
        cls.api_url_model_master = api_url.format(repo, file_model, 'master')
        cls.api_url_model_vers = api_url.format(repo, file_model, vers)
        cls.api_url_state_master = api_url.format(repo, file_state, 'master')
        cls.api_url_state_vers = api_url.format(repo, file_state, vers)
        cls.api_url_defs_master = api_url.format(repo, file_defs, 'master')
        cls.api_url_defs_vers = api_url.format(repo, file_defs, vers)

        cls.api_model_headers = {
            'Content-Disposition': r'inline; filename="_test_model_clf.py"'
        }
        cls.api_state_headers = {
            'Content-Disposition': r'inline; filename="_test_model-clf.gz"'
        }
        cls.api_defs_headers = {
            'Content-Disposition': r'inline; filename="models_dict.json"'
        }

        # Set the debug level of models loader to debug
        _logger.set_level('DEBUG')
Exemple #7
0
    def _test_load_model(self, defs_url, model_url, state_url):
        """Test for `load_model` valid behavior.

        We test the following:
         - all valid requests
         - a need for updating models dict and redownload model
         - a need for updating models dict and redownload model
           with a connection error when download models dict

        Parameters
        ----------
        defs_url : str or None, optional
        model_url : str or None, optional
        state_url : str or None, optional

        """
        with requests_mock.Mocker() as m:

            # Simulate a fine process, with all resources available
            self._mock_requests(m,
                                defs_url=defs_url,
                                model_url=model_url,
                                state_url=state_url)

            self._check_test_model()  # Call model loading

            # We now simulate a need for `models_dict.json` update
            # by removing `.last_update` file
            fm.remove_file(fm.join(SECML_MODELS_DIR, '.last_update'))
            # Also remove test model to force re-download
            fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True)

            self._check_test_model()  # Call model loading

        # We now simulate a need for `models_dict.json` update,
        # but a connection error occurs (simulated by not mocking dl url)
        # Last available version of models dict should be used
        fm.remove_file(fm.join(SECML_MODELS_DIR, '.last_update'))
        fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True)

        with requests_mock.Mocker() as m:
            # Do not mock the url for models definitions
            self._mock_requests(m,
                                defs_url=None,
                                model_url=model_url,
                                state_url=state_url)

            self._check_test_model()  # Call model loading
 def test_plot(self):
     """ Compare the classifiers graphically"""
     ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2,
                         random_state=1).load()
     fig = self._test_plot(self.nc, ds, [-10])
     fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                         'test_c_classifier_nearest_centroid.pdf'))
Exemple #9
0
    def test_load_paths(self):
        """Testing img dataset path loading."""
        dl = CDataLoaderImgClients()

        self.logger.info("Testing loading paths of clients dataset...")

        ds_path = fm.join(fm.abspath(__file__), "ds_clients")

        ds = dl.load(ds_path=ds_path, img_format='jpeg', load_data=False)

        self.logger.info(
            "Loaded {:} images of {:} features, {:} classes".format(
                ds.num_samples, ds.num_features, ds.num_classes))

        # TODO: USE 'U' AFTER TRANSITION TO PYTHON 3
        self.assertIn(ds.X.dtype.char, ('S', 'U'))

        # Checking correct label-img association
        self.assertEqual(ds.Y[0].item(),
                         fm.split(ds.X[0, :].item())[1].replace('.jpeg', ''))
        self.assertEqual(ds.Y[1].item(),
                         fm.split(ds.X[1, :].item())[1].replace('.jpeg', ''))

        # Checking behavior of `get_labels_ovr`
        ovr = ds.get_labels_ovr(pos_label='tiger')  # Y : ['coyote', 'tiger']
        self.assert_array_equal(ovr, CArray([0, 1]))
 def test_plot(self):
     """ Compare the classifiers graphically"""
     ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2,
                         random_state=1).load()
     fig = self._test_plot(self.rnd_forest, ds, levels=[0.5])
     fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                         'test_c_classifier_random_forest.pdf'))
    def _load_class_names(self, meta_file, class_names_key):
        """Load the names for the classes in the CIFAR dataset.

        Parameters
        ----------
        meta_file : str
            Name of the metafile where the labels are stored.
        class_names_key : bytes
            Dictionary key where the labels are stored.

        Returns
        ----------
        dict
            A dictionary with the label of each class.

        """
        meta_file_url = fm.join(self.data_path, meta_file)

        # Load the class-names from the pickled file.
        with open(meta_file_url, 'rb') as mf:
            raw = pickle.load(mf, encoding='bytes')[class_names_key]

        # Convert from binary strings.
        names = {i: x.decode('utf-8') for i, x in enumerate(raw)}

        return names
    def _test_plot(self, evas):
        """Check if `stored_vars` is correctly populated.

        Parameters
        ----------
        evas : CAttackEvasionCleverhans

        """
        if self.make_figures is False:
            self.logger.debug("Skipping figures...")
            return

        fig = CFigure()

        fig.sp.plot_path(evas.x_seq)
        fig.sp.plot_fun(evas.objective_function,
                        plot_levels=False,
                        multipoint=True,
                        n_grid_points=50)
        fig.sp.plot_decision_regions(self.clf,
                                     plot_background=False,
                                     n_grid_points=100)

        fig.title("ATTACK: {}, y_target: {}".format(
            evas._clvrh_attack_class.__name__, self.y_target))

        name_file = '{}_evasion2D_target_{}.pdf'.format(
            evas._clvrh_attack_class.__name__, self.y_target)
        fig.savefig(fm.join(self.images_folder, name_file), file_format='pdf')
Exemple #13
0
 def test_plot(self):
     """ Compare the classifiers graphically"""
     ds = CDLRandom(n_features=2, n_redundant=0, n_informative=2,
                    n_clusters_per_class=1, random_state=0).load()
     ds.X = CNormalizerMinMax().fit_transform(ds.X)
     fig = self._test_plot(self.ridges[0], ds)
     fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                         'test_c_classifier_ridge.pdf'))
Exemple #14
0
 def test_plot(self):
     ds = CDLRandomBlobs(n_samples=100,
                         centers=3,
                         n_features=2,
                         random_state=1).load()
     fig = self._test_plot(self.knn, ds, levels=[0.5])
     fig.savefig(
         fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_knn.pdf'))
 def _save_fig(self):
     """Visualizing the function being optimized with line search."""
     x_range = CArray.arange(-5, 20, 0.5, )
     score_range = x_range.T.apply_along_axis(self.fun.fun, axis=1)
     ref_line = CArray.zeros(x_range.size)
     fig = CFigure(height=6, width=12)
     fig.sp.plot(x_range, score_range, color='b')
     fig.sp.plot(x_range, ref_line, color='k')
     filename = fm.join(fm.abspath(__file__), 'test_line_search_bisect.pdf')
     fig.savefig(filename)
    def _load_files(self,
                    ds_path,
                    img_w,
                    img_h,
                    img_c,
                    img_ext,
                    load_data=True):
        """Loads any file with given extension inside input folder."""
        # Files will be loaded in alphabetical order
        files_list = sorted(fm.listdir(ds_path))

        # Placeholder for patterns CArray
        patterns = None
        for file_name in files_list:

            # Full path to image file
            file_path = fm.join(ds_path, file_name)

            # Load only files of the specified format
            if fm.splitext(file_name)[1].lower() == img_ext:
                # Opening image in lazy mode (to verify dimensions etc.)
                img = Image.open(file_path)

                # Storing image dimensions...
                img_w = img_w.append(img.width)
                img_h = img_h.append(img.height)
                img_c = img_c.append(len(img.getbands()))

                # If load_data is True, store features, else store path
                if load_data is True:
                    # Storing image as a 2D CArray
                    array_img = CArray(img.getdata()).ravel().atleast_2d()
                else:
                    array_img = CArray([[file_path]])

                # Creating the 2D array patterns x features
                patterns = patterns.append(
                    array_img, axis=0) if patterns is not None else array_img

                self.logger.debug("{:} has been loaded..."
                                  "".format(fm.join(ds_path, file_name)))

        return patterns, img_w, img_h, img_c
    def test_save_and_load_svmlight_file(self):
        """Testing libsvm dataset loading and saving."""
        self.logger.info("Testing libsvm dataset loading and saving...")

        test_file = fm.join(fm.abspath(__file__), "myfile.libsvm")

        # Cleaning test file
        try:
            fm.remove_file(test_file)
        except (OSError, IOError) as e:
            if e.errno != 2:
                raise e

        self.logger.info("Patterns saved:\n{:}".format(self.patterns))
        self.logger.info("Labels saved:\n{:}".format(self.labels))

        CDataLoaderSvmLight.dump(CDataset(self.patterns, self.labels),
                                 test_file)

        new_dataset = CDataLoaderSvmLight().load(test_file)

        self.assertFalse((new_dataset.X != self.patterns).any())
        self.assertFalse((new_dataset.Y != self.labels).any())

        # load data but now remove all zero features (colums)
        new_dataset = CDataLoaderSvmLight().load(test_file,
                                                 remove_all_zero=True)

        self.logger.info("Patterns loaded:\n{:}".format(new_dataset.X))
        self.logger.info("Labels loaded:\n{:}".format(new_dataset.Y))
        self.logger.info("Mapping back:\n{:}".format(
            new_dataset.header.idx_mapping))

        self.assertTrue(new_dataset.X.issparse)
        self.assertTrue(new_dataset.Y.isdense)
        self.assertTrue(new_dataset.header.idx_mapping.isdense)

        # non-zero elements should be unchanged
        self.assertEqual(self.patterns.nnz, new_dataset.X.nnz)
        new_nnz_data = new_dataset.X.nnz_data
        self.assertFalse((self.patterns.nnz_data != new_nnz_data.sort()).any())

        # With idx_mapping we should be able to reconstruct original data
        original = CArray.zeros(self.patterns.shape, sparse=True)
        original[:, new_dataset.header.idx_mapping] = new_dataset.X
        self.assertFalse((self.patterns != original).any())

        # Cleaning test file
        try:
            fm.remove_file(test_file)
        except (OSError, IOError) as e:
            if e.errno != 2:
                raise e
    def test_ps_kmedians(self):
        rule = 'k-medians'
        self.logger.info("Testing: " + rule + " selector.")
        ps = CPrototypesSelector.create(rule)
        ps.verbose = 2
        ds_reduced = ps.select(self.dataset, n_prototypes=20, random_state=0)

        if self.plots is True:
            self.draw_selection(ds_reduced, rule)

        # k_means in sklearn > 0.24 returns a different result
        import sklearn
        from pkg_resources import parse_version
        if parse_version(sklearn.__version__) < parse_version("0.24"):
            idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule))
        else:
            idx_path = fm.join(fm.abspath(__file__),
                               "idx_{:}_sk0-24.gz".format(rule))

        self.assert_array_equal(ps.sel_idx,
                                CArray.load(idx_path, dtype=int).ravel())
    def clean_tmp():
        """Cleans temporary files created by the DB loader.

        This method deletes the joblib-related files created while loading
        the database.

        Does not delete the downloaded database archive.

        """
        jl_tmp_folder = fm.join(SECML_DS_DIR, 'lfw_home', 'joblib')
        if fm.folder_exist(jl_tmp_folder):
            fm.remove_folder(jl_tmp_folder, force=True)
    def _load_mnist():
        """Load MNIST 4971 dataset."""
        digits = [4, 9, 7, 1]
        digits_str = "".join(['{:}-'.format(i) for i in digits[:-1]])
        digits_str += '{:}'.format(digits[-1])

        # FIXME: REMOVE THIS AFTER CDATALOADERS AUTOMATICALLY STORE DS
        tr_file = fm.join(fm.abspath(__file__),
                          'mnist_tr_{:}.gz'.format(digits_str))
        if not fm.file_exist(tr_file):
            loader = CDataLoaderMNIST()
            tr = loader.load('training', digits=digits)
            pickle_utils.save(tr_file, tr)
        else:
            tr = pickle_utils.load(tr_file, encoding='latin1')

        ts_file = fm.join(fm.abspath(__file__),
                          'mnist_ts_{:}.gz'.format(digits_str))
        if not fm.file_exist(ts_file):
            loader = CDataLoaderMNIST()
            ts = loader.load('testing', digits=digits)
            pickle_utils.save(ts_file, ts)
        else:
            ts = pickle_utils.load(ts_file, encoding='latin1')

        idx = CArray.arange(tr.num_samples)
        val_dts_idx = CArray.randsample(idx, 200, random_state=0)
        val_dts = tr[val_dts_idx, :]

        tr_dts_idx = CArray.randsample(idx, 200, random_state=0)
        tr = tr[tr_dts_idx, :]

        idx = CArray.arange(0, ts.num_samples)
        ts_dts_idx = CArray.randsample(idx, 200, random_state=0)
        ts = ts[ts_dts_idx, :]

        tr.X /= 255.0
        ts.X /= 255.0

        return tr, val_dts, ts, digits, tr.header.img_w, tr.header.img_h
Exemple #21
0
    def _plot_2d_evasion(self, evas, ds, x0, filename, th=0, grid_limits=None):
        """Plot evasion attack results for 2D data.

        Parameters
        ----------
        evas : CAttackEvasion
        ds : CDataset
        x0 : CArray
            Initial attack point.
        filename : str
            Name of the output pdf file.
        th : scalar, optional
            Scores threshold of the classifier. Default 0.
        grid_limits : list of tuple or None, optional
            If not specified, will be set as [(-1.5, 1.5), (-1.5, 1.5)].

        """
        if self.make_figures is False:
            self.logger.debug("Skipping figures...")
            return

        fig = CFigure(height=6, width=6)

        if grid_limits is None:
            grid_limits = [(-1.5, 1.5), (-1.5, 1.5)]

        fig.sp.plot_ds(ds)
        fig.sp.plot_fun(func=evas.objective_function,
                        grid_limits=grid_limits,
                        colorbar=False,
                        n_grid_points=50,
                        plot_levels=False)

        fig.sp.plot_decision_regions(clf=evas.classifier,
                                     plot_background=False,
                                     grid_limits=grid_limits,
                                     n_grid_points=50)

        fig.sp.plot_constraint(self._box(evas),
                               n_grid_points=20,
                               grid_limits=grid_limits)

        fig.sp.plot_fun(func=lambda z: self._constr(evas, x0).constraint(z),
                        plot_background=False,
                        n_grid_points=50,
                        grid_limits=grid_limits,
                        levels=[0],
                        colorbar=False)

        fig.sp.plot_path(evas.x_seq)

        fig.savefig(fm.join(self.images_folder, filename), file_format='pdf')
    def _get_data(self, file_url, dl_folder):
        """Download input datafile, unzip and store in output_path.

        Parameters
        ----------
        file_url : str
            URL of the file to download.
        dl_folder : str
            Path to the folder where to store the downloaded file.

        """
        f_dl = fm.join(dl_folder, 'iCubWorld28_128x128.zip?dl=1')
        if not fm.file_exist(f_dl) or md5(f_dl) != ICUBWORLD28_MD5:
            # Generate the full path to the downloaded file
            f_dl = dl_file(file_url, dl_folder, md5_digest=ICUBWORLD28_MD5)

        self.logger.info("Extracting files...")

        # Extract the content of downloaded file
        zipfile.ZipFile(f_dl, 'r').extractall(dl_folder)
        # Remove downloaded file
        fm.remove_file(f_dl)

        # iCubWorld28 zip file contains a macosx private folder, clean it up
        if fm.folder_exist(fm.join(ICUBWORLD28_PATH, '__MACOSX')):
            fm.remove_folder(fm.join(ICUBWORLD28_PATH, '__MACOSX'), force=True)

        # iCubWorld28 zip file contains a macosx private files, clean it up
        for dirpath, dirnames, filenames in os.walk(ICUBWORLD28_PATH):
            for file in filenames:
                if fnmatch(file, '.DS_Store'):
                    fm.remove_file(fm.join(dirpath, file))

        # Now move all data to an upper folder if needed
        if not fm.folder_exist(self._train_path) \
                or not fm.folder_exist(self._test_path):
            sub_d = fm.join(dl_folder, fm.listdir(dl_folder)[0])
            for e in fm.listdir(sub_d):
                e_full = fm.join(sub_d, e)  # Full path to current element
                try:  # Call copy_file or copy_folder when applicable
                    if fm.file_exist(e_full) is True:
                        fm.copy_file(e_full, dl_folder)
                    elif fm.folder_exist(e_full) is True:
                        fm.copy_folder(e_full, fm.join(dl_folder, e))
                except:
                    pass

            # Check that the main dataset file is now in the correct folder
            if not fm.folder_exist(self._train_path) \
                    or not fm.folder_exist(self._test_path):
                raise RuntimeError("dataset main file not available!")

            # The subdirectory can now be removed
            fm.remove_folder(sub_d, force=True)
Exemple #23
0
    def test_save_load(self):

        a = CArray([1, 2, 3])  # Dummy test array

        # Generate a temp file to test
        import tempfile
        tempdir = tempfile.gettempdir()
        tempfile = fm.join(tempdir, 'secml_testpickle')

        tempfile = pickle_utils.save(tempfile, a)

        a_loaded = pickle_utils.load(tempfile)

        self.assert_array_equal(a_loaded, a)
Exemple #24
0
    def test_load_img(self):
        """Testing img dataset loading."""

        dl = CDataLoaderImgFolders()

        self.logger.info("Testing loading rgb dataset...")

        ds_rgb_path = fm.join(fm.abspath(__file__), "ds_rgb")

        ds = dl.load(ds_path=ds_rgb_path, img_format='jpeg')

        self.logger.info(
            "Loaded {:} images of {:} features, {:} classes".format(
                ds.num_samples, ds.num_features, ds.num_classes))

        self.assertEqual((2, 151875), ds.X.shape)
        self.assertEqual(2, ds.num_classes)
        self.assertTrue((ds.header.img_w == 225).all())
        self.assertTrue((ds.header.img_h == 225).all())
        self.assertTrue((ds.header.img_c == 3).all())

        self.logger.info("Testing loading grayscale dataset...")

        ds_gray_path = fm.join(fm.abspath(__file__), "ds_gray")

        ds = dl.load(ds_path=ds_gray_path, img_format='jpeg')

        self.logger.info(
            "Loaded {:} images of {:} features, {:} classes".format(
                ds.num_samples, ds.num_features, ds.num_classes))

        self.assertEqual((2, 50625), ds.X.shape)
        self.assertEqual(2, ds.num_classes)
        self.assertTrue((ds.header.img_w == 225).all())
        self.assertTrue((ds.header.img_h == 225).all())
        self.assertTrue((ds.header.img_c == 1).all())
    def __init__(self):

        # Extract the name of the data file from the url
        self.data_file = self.data_url.split('/')[-1]

        # Path to the downloaded dataset file
        data_file_path = fm.join(CIFAR_PATH, self.data_file)

        with CDataLoaderCIFAR.__lock:
            # Download (if needed) data and extract it
            if not fm.file_exist(data_file_path) or \
                    md5(data_file_path) != self.data_md5:
                self._get_data(self.data_url, CIFAR_PATH)
            elif not fm.folder_exist(self.data_path):
                # Downloaded datafile seems valid, extract only
                self._get_data(self.data_url, CIFAR_PATH, extract_only=True)
Exemple #26
0
 def _test_rule(self, rule, n_prototypes=20, random_state=None):
     """Generic test case for prototype selectors."""
     self.logger.info("Testing: " + rule + " selector.")
     ps = CPrototypesSelector.create(rule)
     ps.verbose = 2
     if random_state is None:
         ds_reduced = ps.select(self.dataset, n_prototypes=n_prototypes)
     else:
         ds_reduced = ps.select(self.dataset,
                                n_prototypes=n_prototypes,
                                random_state=random_state)
     idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule))
     self.assert_array_equal(ps.sel_idx,
                             CArray.load(idx_path, dtype=int).ravel())
     if self.plots is True:
         self.draw_selection(ds_reduced, rule)
Exemple #27
0
    def test_dl_file_md5(self):

        # Fixed long string to write to the file
        x = b'abcd' * 10000

        # Expected digest of the file
        md5_test = '3f0f597c3c69ce42e554fdad3adcbeea'

        # Generate a temp file to test and write content
        tempf = fm.join(self.tempdir, 'test_dl_file_md5')
        with open(tempf, 'wb') as fp:
            fp.write(x)

        md5_digest = md5(fp.name)

        self.logger.info("MD5: {:}".format(md5_digest))
        self.assertEqual(md5_test, md5_digest)
Exemple #28
0
    def test_ps_kmedians(self):
        rule = 'k-medians'
        self.logger.info("Testing: " + rule + " selector.")
        ps = CPrototypesSelector.create(rule)
        ps.verbose = 2
        ds_reduced = ps.select(self.dataset, n_prototypes=20, random_state=0)

        # this test will fail with sklearn < 0.22, because of an issue in
        # random_state setting inside the k-means algorithm
        import sklearn
        from pkg_resources import parse_version
        if not parse_version(sklearn.__version__) < parse_version("0.22"):
            idx_path = fm.join(fm.abspath(__file__), "idx_{:}.gz".format(rule))
            self.assert_array_equal(ps.sel_idx,
                                    CArray.load(idx_path, dtype=int).ravel())
        if self.plots is True:
            self.draw_selection(ds_reduced, rule)
Exemple #29
0
    def test_save_load(self):
        """Test save/load of sparse arrays"""
        self.logger.info("UNITTEST - CSparse - save/load")

        test_file = fm.join(fm.abspath(__file__), 'test.txt')

        # Cleaning test file
        try:
            fm.remove_file(test_file)
        except (OSError, IOError) as e:
            if e.errno != 2:
                raise e

        self.logger.info(
            "UNITTEST - CSparse - Testing save/load for sparse matrix")

        self.sparse_matrix.save(test_file)

        self.logger.info(
            "Saving again with overwrite=False... IOError should be raised.")
        with self.assertRaises(IOError) as e:
            self.sparse_matrix.save(test_file)
        self.logger.info(e.exception)

        loaded_sparse_matrix = CSparse.load(test_file, dtype=int)

        self.assertFalse((loaded_sparse_matrix != self.sparse_matrix).any(),
                         "Saved and loaded arrays (matrices) are not equal!")

        self.logger.info(
            "UNITTEST - CSparse - Testing save/load for sparse vector")

        self.sparse_vector.save(test_file, overwrite=True)
        loaded_sparse_vector = CSparse.load(test_file, dtype=int)

        self.assertFalse((loaded_sparse_vector != self.sparse_vector).any(),
                         "Saved and loaded arrays (vectors) are not equal!")

        # Cleaning test file
        try:
            fm.remove_file(test_file)
        except (OSError, IOError) as e:
            if e.errno != 2:
                raise e
    def test_set_get_state(self):
        """Test for set_state and get_state."""

        pre = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}])
        clf = CClassifierSkLearn(sklearn_model=SVC(kernel="rbf",
                                                   gamma=2,
                                                   C=1,
                                                   random_state=0),
                                 preprocess=pre)

        clf.fit(self.dataset)
        pred_y = clf.predict(self.dataset.X)
        self.logger.info(
            "Predictions before restoring state:\n{:}".format(pred_y))

        state = clf.get_state()
        self.logger.info("State of multiclass:\n{:}".format(state))

        # Generate a temp file to test
        import tempfile
        from secml.utils import fm
        tempdir = tempfile.gettempdir()
        tempfile = fm.join(tempdir, 'secml_testgetsetstate')

        # Test save state to disk
        tempfile = clf.save_state(tempfile)

        # Create an entirely new clf
        pre_post = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}])
        clf_post = CClassifierSkLearn(sklearn_model=SVC(kernel="rbf",
                                                        gamma=2,
                                                        C=1,
                                                        random_state=0),
                                      preprocess=pre_post)

        # Restore state from disk
        clf_post.load_state(tempfile)

        pred_y_post = clf_post.predict(self.dataset.X)
        self.logger.info(
            "Predictions after restoring state:\n{:}".format(pred_y_post))

        self.assert_array_equal(pred_y, pred_y_post)