def __init__(self): self._train_path = fm.join(ICUBWORLD28_PATH, 'train') self._test_path = fm.join(ICUBWORLD28_PATH, 'test') with CDataLoaderICubWorld28.__lock: # Download (if needed) data and extract it if not fm.folder_exist(self._train_path) \ or not fm.folder_exist(self._test_path): self._get_data(ICUBWORLD28_URL, ICUBWORLD28_PATH)
def _explore_dir(self, dir_path, img_w, img_h, img_c, img_ext, label_re=None, load_data=True): """Explore input directory and load files if leaf.""" # Folders/files will be loaded in alphabetical order items_list = sorted(fm.listdir(dir_path)) # A leaf folder is a folder with only files in it leaf = not any( fm.folder_exist(fm.join(dir_path, item)) for item in items_list) if leaf is True: # Leaf directory, time to load files! return self._load_files(dir_path, img_w, img_h, img_c, img_ext, label_re=label_re, load_data=load_data) # Placeholder for patterns/labels CArray patterns = None labels = None for subdir in items_list: subdir_path = fm.join(dir_path, subdir) # Only consider folders (there could be also files) if not fm.folder_exist(subdir_path): continue # Explore next subfolder patterns_new, labels_new, img_w, img_h, img_c = self._explore_dir( subdir_path, img_w, img_h, img_c, img_ext, label_re=label_re, load_data=load_data) patterns = patterns.append(patterns_new, axis=0) \ if patterns is not None else patterns_new labels = labels.append(labels_new) \ if labels is not None else labels_new return patterns, labels, img_w, img_h, img_c
def tearDown(self): # Remove existing 'models_dict.json' before testing if fm.file_exist(MODELS_DICT_PATH): fm.remove_file(MODELS_DICT_PATH) # Removing folder with test model (force 'cause not empty) if fm.folder_exist(fm.join(SECML_MODELS_DIR, '_test')): fm.remove_folder(fm.join(SECML_MODELS_DIR, '_test'), force=True)
def clean_tmp(): """Cleans temporary files created by the DB loader. This method deletes the joblib-related files created while loading the database. Does not delete the downloaded database archive. """ jl_tmp_folder = fm.join(SECML_DS_DIR, 'lfw_home', 'joblib') if fm.folder_exist(jl_tmp_folder): fm.remove_folder(jl_tmp_folder, force=True)
def __init__(self): # Extract the name of the data file from the url self.data_file = self.data_url.split('/')[-1] # Path to the downloaded dataset file data_file_path = fm.join(CIFAR_PATH, self.data_file) with CDataLoaderCIFAR.__lock: # Download (if needed) data and extract it if not fm.file_exist(data_file_path) or \ md5(data_file_path) != self.data_md5: self._get_data(self.data_url, CIFAR_PATH) elif not fm.folder_exist(self.data_path): # Downloaded datafile seems valid, extract only self._get_data(self.data_url, CIFAR_PATH, extract_only=True)
def _get_data(self, file_url, dl_folder): """Download input datafile, unzip and store in output_path. Parameters ---------- file_url : str URL of the file to download. dl_folder : str Path to the folder where to store the downloaded file. """ f_dl = fm.join(dl_folder, 'iCubWorld28_128x128.zip?dl=1') if not fm.file_exist(f_dl) or md5(f_dl) != ICUBWORLD28_MD5: # Generate the full path to the downloaded file f_dl = dl_file(file_url, dl_folder, md5_digest=ICUBWORLD28_MD5) self.logger.info("Extracting files...") # Extract the content of downloaded file zipfile.ZipFile(f_dl, 'r').extractall(dl_folder) # Remove downloaded file fm.remove_file(f_dl) # iCubWorld28 zip file contains a macosx private folder, clean it up if fm.folder_exist(fm.join(ICUBWORLD28_PATH, '__MACOSX')): fm.remove_folder(fm.join(ICUBWORLD28_PATH, '__MACOSX'), force=True) # iCubWorld28 zip file contains a macosx private files, clean it up for dirpath, dirnames, filenames in os.walk(ICUBWORLD28_PATH): for file in filenames: if fnmatch(file, '.DS_Store'): fm.remove_file(fm.join(dirpath, file)) # Now move all data to an upper folder if needed if not fm.folder_exist(self._train_path) \ or not fm.folder_exist(self._test_path): sub_d = fm.join(dl_folder, fm.listdir(dl_folder)[0]) for e in fm.listdir(sub_d): e_full = fm.join(sub_d, e) # Full path to current element try: # Call copy_file or copy_folder when applicable if fm.file_exist(e_full) is True: fm.copy_file(e_full, dl_folder) elif fm.folder_exist(e_full) is True: fm.copy_folder(e_full, fm.join(dl_folder, e)) except: pass # Check that the main dataset file is now in the correct folder if not fm.folder_exist(self._train_path) \ or not fm.folder_exist(self._test_path): raise RuntimeError("dataset main file not available!") # The subdirectory can now be removed fm.remove_folder(sub_d, force=True)
def load(self, ds_type, day='day4', icub7=False, resize_shape=(128, 128), crop_shape=None, normalize=True): """Load the dataset. The pre-cropped version of the images is loaded, with size 128 x 128. An additional resize/crop shape could be passed as input if needed. Extra dataset attributes: - 'img_w', 'img_h': size of the images in pixels. - 'y_orig': CArray with the original labels of the objects. Parameters ---------- ds_type : str Identifier of the dataset to download, either 'train' or 'test'. day : str, optional Acquisition day from which to load the images. Default 'day4'. The available options are: 'day1', 'day2', 'day3', 'day4'. icub7 : bool or int, optional If True, load a reduced dataset with 7 objects by taking the 3rd object for each category. Default False. If int, the Nth object for each category will be loaded. resize_shape : tuple, optional Images will be resized to (height, width) shape. Default (128, 128). crop_shape : tuple or None, optional If a tuple, a crop of (height, width) shape will be extracted from the center of each image. Default None. normalize : bool, optional If True, images are normalized between 0-1. Default True. Returns ------- CDataset Output dataset. """ if ds_type == 'train': data_path = self._train_path elif ds_type == 'test': data_path = self._test_path else: raise ValueError("use ds_type = {'train', 'test'}.") day_path = fm.join(data_path, day) if not fm.folder_exist(day_path): raise ValueError("{:} not available.".format(day)) self.logger.info( "Loading iCubWorld{:} {:} {:} dataset from {:}".format( '7' if icub7 else '28', day, ds_type, day_path)) icub7 = 3 if icub7 is True else icub7 # Use the 3rd sub-obj by default x = None y_orig = [] for obj in sorted(fm.listdir(day_path)): # Objects (cup, sponge, ..) obj_path = fm.join(day_path, obj) # Sub-objects (cup1, cup2, ...) for sub_obj in sorted(fm.listdir(obj_path)): if icub7 and sub_obj[-1] != str(icub7): continue # Load only the `icub7`th object self.logger.debug("Loading images for {:}".format(sub_obj)) sub_obj_path = fm.join(obj_path, sub_obj) for f in sorted(fm.listdir(sub_obj_path)): img = Image.open(fm.join(sub_obj_path, f)) if resize_shape is not None: img = resize_img(img, resize_shape) if crop_shape is not None: img = crop_img(img, crop_shape) img = CArray(img.getdata(), dtype='uint8').ravel() x = x.append(img, axis=0) if x is not None else img y_orig.append(sub_obj) # Label is given by sub-obj name # Create the int-based array of labels. Keep original labels in y_orig y_orig = CArray(y_orig) y = CArray(y_orig).unique(return_inverse=True)[1] if normalize is True: x /= 255.0 # Size of images is the crop shape (if any) otherwise, the resize shape img_h, img_w = crop_shape if crop_shape is not None else resize_shape header = CDatasetHeader(img_w=img_w, img_h=img_h, y_orig=y_orig) return CDataset(x, y, header=header)
import os import numpy as np from secml.array import CArray from secml.data.loader import CDLRandomBlobs from secml.optim.constraints import \ CConstraintBox, CConstraintL1, CConstraintL2 from secml.ml.features.normalization import CNormalizerMinMax from secml.ml.classifiers import CClassifierSVM, CClassifierDecisionTree from secml.core.type_utils import is_list, is_float from secml.figure import CFigure from secml.utils import fm IMAGES_FOLDER = fm.join(fm.abspath(__file__), 'test_images') if not fm.folder_exist(IMAGES_FOLDER): fm.make_folder(IMAGES_FOLDER) class CAttackEvasionTestCases(CUnitTest): """Unittests interface for CAttackEvasion.""" images_folder = IMAGES_FOLDER make_figures = os.getenv('MAKE_FIGURES', False) # True to produce figures def _load_blobs(self, n_feats, n_clusters, sparse=False, seed=None): """Load Random Blobs dataset. - n_samples = 50 - center_box = (-0.5, 0.5) - cluster_std = 0.5
def _plot_optimization(self, solver, x_0, g_min, grid_limits, method=None, vmin=None, vmax=None, label=None): """Plots the optimization problem. Parameters ---------- solver : COptimizer x_0 : CArray Starting point. g_min : CArray Final point (after optimization). grid_limits : list of tuple vmin, vmax : int or None, optional label : str or None, optional """ fig = CFigure(markersize=12) # Plot objective function fig.sp.plot_fun(func=CArray.apply_along_axis, plot_background=True, n_grid_points=30, n_colors=25, grid_limits=grid_limits, levels=[0.5], levels_color='gray', levels_style='--', colorbar=True, func_args=( solver.f.fun, 1, ), vmin=vmin, vmax=vmax) if solver.bounds is not None: # Plot box constraint fig.sp.plot_fun(func=lambda x: solver.bounds.constraint(x), plot_background=False, n_grid_points=20, grid_limits=grid_limits, levels=[0], colorbar=False) if solver.constr is not None: # Plot distance constraint fig.sp.plot_fun(func=lambda x: solver.constr.constraint(x), plot_background=False, n_grid_points=20, grid_limits=grid_limits, levels=[0], colorbar=False) # Plot optimization trace if solver.x_seq is not None: fig.sp.plot_path(solver.x_seq) else: fig.sp.plot_path(x_0.append(g_min, axis=0)) fig.sp.title("{:}(fun={:}) - Glob Min @ {:}".format( solver.class_type, solver.f.class_type, solver.f.global_min_x().round(2).tolist())) test_img_fold_name = 'test_images' test_img_fold_path = fm.join(fm.abspath(__file__), test_img_fold_name) if not fm.folder_exist(test_img_fold_path): fm.make_folder(test_img_fold_path) if method is None: filename = fm.join(test_img_fold_path, solver.class_type + '-' + solver.f.class_type) else: filename = fm.join( test_img_fold_path, solver.class_type + '-' + method + '-' + solver.f.class_type) filename += '-' + label if label is not None else '' test_img_fold_name = 'test_images' if not fm.folder_exist(test_img_fold_name): fm.make_folder(test_img_fold_name) fig.savefig('{:}.pdf'.format(filename))
def dl_file(url, output_dir, user=None, headers=None, chunk_size=1024, md5_digest=None): """Download file from input url and store in output_dir. Parameters ---------- url : str Url of the file to download. output_dir : str Path to the directory where the file should be stored. If folder does not exists, will be created. user : str or None, optional String with the user[:password] if required for accessing url. headers : dict or None, optional Dictionary with any additional header for the download request. chunk_size : int, optional Size of the data chunk to read from url in bytes. Default 1024. md5_digest : str or None, optional Expected MD5 digest of the downloaded file. If a different digest is computed, the downloaded file will be removed and ValueError is raised. """ # Parsing user string auth = tuple(user.split(':')) if user is not None else None # If no password is specified, use an empty string auth = (auth[0], '') if auth is not None and len(auth) == 1 else auth r = requests.get(url, auth=auth, headers=headers, stream=True) if r.status_code != 200: raise RuntimeError("File is not available (error code {:})".format( r.status_code)) # Get file size (bytes) if "content-length" in r.headers: total_size = r.headers.get('content-length').strip() total_size = int(total_size) else: # Total size unknown total_size = None dl = 0 if chunk_size < 1: raise ValueError("chunk_size must be at least 1 byte") sys.stdout.write("Downloading from `{:}`".format(url)) if total_size is not None: sys.stdout.write(" ({:} bytes)".format(total_size)) sys.stdout.write("\n") sys.stdout.flush() # Create output directory if not exists if not fm.folder_exist(output_dir): fm.make_folder(output_dir) try: # Get the filename from the response headers fname = re.findall(r"filename=\"(.+)\"", r.headers["Content-Disposition"])[0] except (KeyError, IndexError): # Or use the last part of download url (removing parameters) fname = url.split('/')[-1].split('?', 1)[0] # Build full path of output file out_path = fm.join(output_dir, fname) # Read data and store each chunk with open(out_path, 'wb') as f: for chunk in r.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) # Report progress (if total_size is known) if total_size is not None: dl += len(chunk) done = int((50 * dl) / total_size) if sys.stdout.isatty() is True: # Provide real-time updates (if stdout is a tty) sys.stdout.write("\r[{:}{:}] {:}/{:}".format( '=' * done, ' ' * (50 - done), dl, total_size)) sys.stdout.flush() sys.stdout.write("\nFile stored in `{:}`\n".format(out_path)) sys.stdout.flush() if md5_digest is not None and md5_digest != md5(out_path, chunk_size): fm.remove_file(out_path) # Remove the probably-corrupted file raise ValueError("Unexpected MD5 hash for the downloaded file.") return out_path