def __init__(self):

        self._train_path = fm.join(ICUBWORLD28_PATH, 'train')
        self._test_path = fm.join(ICUBWORLD28_PATH, 'test')

        with CDataLoaderICubWorld28.__lock:
            # Download (if needed) data and extract it
            if not fm.folder_exist(self._train_path) \
                    or not fm.folder_exist(self._test_path):
                self._get_data(ICUBWORLD28_URL, ICUBWORLD28_PATH)
예제 #2
0
    def _explore_dir(self,
                     dir_path,
                     img_w,
                     img_h,
                     img_c,
                     img_ext,
                     label_re=None,
                     load_data=True):
        """Explore input directory and load files if leaf."""
        # Folders/files will be loaded in alphabetical order
        items_list = sorted(fm.listdir(dir_path))

        # A leaf folder is a folder with only files in it
        leaf = not any(
            fm.folder_exist(fm.join(dir_path, item)) for item in items_list)

        if leaf is True:  # Leaf directory, time to load files!
            return self._load_files(dir_path,
                                    img_w,
                                    img_h,
                                    img_c,
                                    img_ext,
                                    label_re=label_re,
                                    load_data=load_data)

        # Placeholder for patterns/labels CArray
        patterns = None
        labels = None
        for subdir in items_list:

            subdir_path = fm.join(dir_path, subdir)

            # Only consider folders (there could be also files)
            if not fm.folder_exist(subdir_path):
                continue

            # Explore next subfolder
            patterns_new, labels_new, img_w, img_h, img_c = self._explore_dir(
                subdir_path,
                img_w,
                img_h,
                img_c,
                img_ext,
                label_re=label_re,
                load_data=load_data)

            patterns = patterns.append(patterns_new, axis=0) \
                if patterns is not None else patterns_new
            labels = labels.append(labels_new) \
                if labels is not None else labels_new

        return patterns, labels, img_w, img_h, img_c
예제 #3
0
    def tearDown(self):

        # Remove existing 'models_dict.json' before testing
        if fm.file_exist(MODELS_DICT_PATH):
            fm.remove_file(MODELS_DICT_PATH)

        # Removing folder with test model (force 'cause not empty)
        if fm.folder_exist(fm.join(SECML_MODELS_DIR, '_test')):
            fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True)
    def clean_tmp():
        """Cleans temporary files created by the DB loader.

        This method deletes the joblib-related files created while loading
        the database.

        Does not delete the downloaded database archive.

        """
        jl_tmp_folder = fm.join(SECML_DS_DIR, 'lfw_home', 'joblib')
        if fm.folder_exist(jl_tmp_folder):
            fm.remove_folder(jl_tmp_folder, force=True)
    def __init__(self):

        # Extract the name of the data file from the url
        self.data_file = self.data_url.split('/')[-1]

        # Path to the downloaded dataset file
        data_file_path = fm.join(CIFAR_PATH, self.data_file)

        with CDataLoaderCIFAR.__lock:
            # Download (if needed) data and extract it
            if not fm.file_exist(data_file_path) or \
                    md5(data_file_path) != self.data_md5:
                self._get_data(self.data_url, CIFAR_PATH)
            elif not fm.folder_exist(self.data_path):
                # Downloaded datafile seems valid, extract only
                self._get_data(self.data_url, CIFAR_PATH, extract_only=True)
    def _get_data(self, file_url, dl_folder):
        """Download input datafile, unzip and store in output_path.

        Parameters
        ----------
        file_url : str
            URL of the file to download.
        dl_folder : str
            Path to the folder where to store the downloaded file.

        """
        f_dl = fm.join(dl_folder, 'iCubWorld28_128x128.zip?dl=1')
        if not fm.file_exist(f_dl) or md5(f_dl) != ICUBWORLD28_MD5:
            # Generate the full path to the downloaded file
            f_dl = dl_file(file_url, dl_folder, md5_digest=ICUBWORLD28_MD5)

        self.logger.info("Extracting files...")

        # Extract the content of downloaded file
        zipfile.ZipFile(f_dl, 'r').extractall(dl_folder)
        # Remove downloaded file
        fm.remove_file(f_dl)

        # iCubWorld28 zip file contains a macosx private folder, clean it up
        if fm.folder_exist(fm.join(ICUBWORLD28_PATH, '__MACOSX')):
            fm.remove_folder(fm.join(ICUBWORLD28_PATH, '__MACOSX'), force=True)

        # iCubWorld28 zip file contains a macosx private files, clean it up
        for dirpath, dirnames, filenames in os.walk(ICUBWORLD28_PATH):
            for file in filenames:
                if fnmatch(file, '.DS_Store'):
                    fm.remove_file(fm.join(dirpath, file))

        # Now move all data to an upper folder if needed
        if not fm.folder_exist(self._train_path) \
                or not fm.folder_exist(self._test_path):
            sub_d = fm.join(dl_folder, fm.listdir(dl_folder)[0])
            for e in fm.listdir(sub_d):
                e_full = fm.join(sub_d, e)  # Full path to current element
                try:  # Call copy_file or copy_folder when applicable
                    if fm.file_exist(e_full) is True:
                        fm.copy_file(e_full, dl_folder)
                    elif fm.folder_exist(e_full) is True:
                        fm.copy_folder(e_full, fm.join(dl_folder, e))
                except:
                    pass

            # Check that the main dataset file is now in the correct folder
            if not fm.folder_exist(self._train_path) \
                    or not fm.folder_exist(self._test_path):
                raise RuntimeError("dataset main file not available!")

            # The subdirectory can now be removed
            fm.remove_folder(sub_d, force=True)
    def load(self,
             ds_type,
             day='day4',
             icub7=False,
             resize_shape=(128, 128),
             crop_shape=None,
             normalize=True):
        """Load the dataset.

        The pre-cropped version of the images is loaded, with size 128 x 128.
        An additional resize/crop shape could be passed as input if needed.

        Extra dataset attributes:
          - 'img_w', 'img_h': size of the images in pixels.
          - 'y_orig': CArray with the original labels of the objects.

        Parameters
        ----------
        ds_type : str
            Identifier of the dataset to download, either 'train' or 'test'.
        day : str, optional
            Acquisition day from which to load the images. Default 'day4'.
            The available options are: 'day1', 'day2', 'day3', 'day4'.
        icub7 : bool or int, optional
            If True, load a reduced dataset with 7 objects by
            taking the 3rd object for each category. Default False.
            If int, the Nth object for each category will be loaded.
        resize_shape : tuple, optional
           Images will be resized to (height, width) shape. Default (128, 128).
        crop_shape : tuple or None, optional
            If a tuple, a crop of (height, width) shape will be extracted
            from the center of each image. Default None.
        normalize : bool, optional
            If True, images are normalized between 0-1. Default True.

        Returns
        -------
        CDataset
            Output dataset.

        """
        if ds_type == 'train':
            data_path = self._train_path
        elif ds_type == 'test':
            data_path = self._test_path
        else:
            raise ValueError("use ds_type = {'train', 'test'}.")

        day_path = fm.join(data_path, day)
        if not fm.folder_exist(day_path):
            raise ValueError("{:} not available.".format(day))

        self.logger.info(
            "Loading iCubWorld{:} {:} {:} dataset from {:}".format(
                '7' if icub7 else '28', day, ds_type, day_path))

        icub7 = 3 if icub7 is True else icub7  # Use the 3rd sub-obj by default

        x = None
        y_orig = []
        for obj in sorted(fm.listdir(day_path)):  # Objects (cup, sponge, ..)

            obj_path = fm.join(day_path, obj)

            # Sub-objects (cup1, cup2, ...)
            for sub_obj in sorted(fm.listdir(obj_path)):

                if icub7 and sub_obj[-1] != str(icub7):
                    continue  # Load only the `icub7`th object

                self.logger.debug("Loading images for {:}".format(sub_obj))

                sub_obj_path = fm.join(obj_path, sub_obj)

                for f in sorted(fm.listdir(sub_obj_path)):

                    img = Image.open(fm.join(sub_obj_path, f))

                    if resize_shape is not None:
                        img = resize_img(img, resize_shape)
                    if crop_shape is not None:
                        img = crop_img(img, crop_shape)

                    img = CArray(img.getdata(), dtype='uint8').ravel()
                    x = x.append(img, axis=0) if x is not None else img

                    y_orig.append(sub_obj)  # Label is given by sub-obj name

        # Create the int-based array of labels. Keep original labels in y_orig
        y_orig = CArray(y_orig)
        y = CArray(y_orig).unique(return_inverse=True)[1]

        if normalize is True:
            x /= 255.0

        # Size of images is the crop shape (if any) otherwise, the resize shape
        img_h, img_w = crop_shape if crop_shape is not None else resize_shape

        header = CDatasetHeader(img_w=img_w, img_h=img_h, y_orig=y_orig)

        return CDataset(x, y, header=header)
예제 #8
0
import os
import numpy as np

from secml.array import CArray
from secml.data.loader import CDLRandomBlobs
from secml.optim.constraints import \
    CConstraintBox, CConstraintL1, CConstraintL2
from secml.ml.features.normalization import CNormalizerMinMax
from secml.ml.classifiers import CClassifierSVM, CClassifierDecisionTree
from secml.core.type_utils import is_list, is_float
from secml.figure import CFigure
from secml.utils import fm

IMAGES_FOLDER = fm.join(fm.abspath(__file__), 'test_images')
if not fm.folder_exist(IMAGES_FOLDER):
    fm.make_folder(IMAGES_FOLDER)


class CAttackEvasionTestCases(CUnitTest):
    """Unittests interface for CAttackEvasion."""
    images_folder = IMAGES_FOLDER
    make_figures = os.getenv('MAKE_FIGURES', False)  # True to produce figures

    def _load_blobs(self, n_feats, n_clusters, sparse=False, seed=None):
        """Load Random Blobs dataset.

        - n_samples = 50
        - center_box = (-0.5, 0.5)
        - cluster_std = 0.5
예제 #9
0
    def _plot_optimization(self,
                           solver,
                           x_0,
                           g_min,
                           grid_limits,
                           method=None,
                           vmin=None,
                           vmax=None,
                           label=None):
        """Plots the optimization problem.

        Parameters
        ----------
        solver : COptimizer
        x_0 : CArray
            Starting point.
        g_min : CArray
            Final point (after optimization).
        grid_limits : list of tuple
        vmin, vmax : int or None, optional
        label : str or None, optional

        """
        fig = CFigure(markersize=12)

        # Plot objective function
        fig.sp.plot_fun(func=CArray.apply_along_axis,
                        plot_background=True,
                        n_grid_points=30,
                        n_colors=25,
                        grid_limits=grid_limits,
                        levels=[0.5],
                        levels_color='gray',
                        levels_style='--',
                        colorbar=True,
                        func_args=(
                            solver.f.fun,
                            1,
                        ),
                        vmin=vmin,
                        vmax=vmax)

        if solver.bounds is not None:  # Plot box constraint
            fig.sp.plot_fun(func=lambda x: solver.bounds.constraint(x),
                            plot_background=False,
                            n_grid_points=20,
                            grid_limits=grid_limits,
                            levels=[0],
                            colorbar=False)

        if solver.constr is not None:  # Plot distance constraint
            fig.sp.plot_fun(func=lambda x: solver.constr.constraint(x),
                            plot_background=False,
                            n_grid_points=20,
                            grid_limits=grid_limits,
                            levels=[0],
                            colorbar=False)

        # Plot optimization trace
        if solver.x_seq is not None:
            fig.sp.plot_path(solver.x_seq)
        else:
            fig.sp.plot_path(x_0.append(g_min, axis=0))

        fig.sp.title("{:}(fun={:}) - Glob Min @ {:}".format(
            solver.class_type, solver.f.class_type,
            solver.f.global_min_x().round(2).tolist()))

        test_img_fold_name = 'test_images'
        test_img_fold_path = fm.join(fm.abspath(__file__), test_img_fold_name)
        if not fm.folder_exist(test_img_fold_path):
            fm.make_folder(test_img_fold_path)

        if method is None:
            filename = fm.join(test_img_fold_path,
                               solver.class_type + '-' + solver.f.class_type)
        else:
            filename = fm.join(
                test_img_fold_path,
                solver.class_type + '-' + method + '-' + solver.f.class_type)

        filename += '-' + label if label is not None else ''

        test_img_fold_name = 'test_images'
        if not fm.folder_exist(test_img_fold_name):
            fm.make_folder(test_img_fold_name)

        fig.savefig('{:}.pdf'.format(filename))
def dl_file(url,
            output_dir,
            user=None,
            headers=None,
            chunk_size=1024,
            md5_digest=None):
    """Download file from input url and store in output_dir.

    Parameters
    ----------
    url : str
        Url of the file to download.
    output_dir : str
        Path to the directory where the file should be stored.
        If folder does not exists, will be created.
    user : str or None, optional
        String with the user[:password] if required for accessing url.
    headers : dict or None, optional
        Dictionary with any additional header for the download request.
    chunk_size : int, optional
        Size of the data chunk to read from url in bytes. Default 1024.
    md5_digest : str or None, optional
        Expected MD5 digest of the downloaded file.
        If a different digest is computed, the downloaded file will be
        removed and ValueError is raised.

    """
    # Parsing user string
    auth = tuple(user.split(':')) if user is not None else None
    # If no password is specified, use an empty string
    auth = (auth[0], '') if auth is not None and len(auth) == 1 else auth

    r = requests.get(url, auth=auth, headers=headers, stream=True)

    if r.status_code != 200:
        raise RuntimeError("File is not available (error code {:})".format(
            r.status_code))

    # Get file size (bytes)
    if "content-length" in r.headers:
        total_size = r.headers.get('content-length').strip()
        total_size = int(total_size)
    else:  # Total size unknown
        total_size = None

    dl = 0

    if chunk_size < 1:
        raise ValueError("chunk_size must be at least 1 byte")

    sys.stdout.write("Downloading from `{:}`".format(url))
    if total_size is not None:
        sys.stdout.write(" ({:} bytes)".format(total_size))
    sys.stdout.write("\n")
    sys.stdout.flush()

    # Create output directory if not exists
    if not fm.folder_exist(output_dir):
        fm.make_folder(output_dir)

    try:  # Get the filename from the response headers
        fname = re.findall(r"filename=\"(.+)\"",
                           r.headers["Content-Disposition"])[0]
    except (KeyError, IndexError):
        # Or use the last part of download url (removing parameters)
        fname = url.split('/')[-1].split('?', 1)[0]

    # Build full path of output file
    out_path = fm.join(output_dir, fname)

    # Read data and store each chunk
    with open(out_path, 'wb') as f:
        for chunk in r.iter_content(chunk_size=chunk_size):
            if chunk:  # filter out keep-alive new chunks
                f.write(chunk)
                # Report progress (if total_size is known)
                if total_size is not None:
                    dl += len(chunk)
                    done = int((50 * dl) / total_size)
                    if sys.stdout.isatty() is True:
                        # Provide real-time updates (if stdout is a tty)
                        sys.stdout.write("\r[{:}{:}] {:}/{:}".format(
                            '=' * done, ' ' * (50 - done), dl, total_size))
                        sys.stdout.flush()

    sys.stdout.write("\nFile stored in `{:}`\n".format(out_path))
    sys.stdout.flush()

    if md5_digest is not None and md5_digest != md5(out_path, chunk_size):
        fm.remove_file(out_path)  # Remove the probably-corrupted file
        raise ValueError("Unexpected MD5 hash for the downloaded file.")

    return out_path