def _get_data(self, file_url, dl_folder): """Download input datafile, unzip and store in output_path. Parameters ---------- file_url : str URL of the file to download. dl_folder : str Path to the folder where to store the downloaded file. """ f_dl = fm.join(dl_folder, 'iCubWorld28_128x128.zip?dl=1') if not fm.file_exist(f_dl) or md5(f_dl) != ICUBWORLD28_MD5: # Generate the full path to the downloaded file f_dl = dl_file(file_url, dl_folder, md5_digest=ICUBWORLD28_MD5) self.logger.info("Extracting files...") # Extract the content of downloaded file zipfile.ZipFile(f_dl, 'r').extractall(dl_folder) # Remove downloaded file fm.remove_file(f_dl) # iCubWorld28 zip file contains a macosx private folder, clean it up if fm.folder_exist(fm.join(ICUBWORLD28_PATH, '__MACOSX')): fm.remove_folder(fm.join(ICUBWORLD28_PATH, '__MACOSX'), force=True) # iCubWorld28 zip file contains a macosx private files, clean it up for dirpath, dirnames, filenames in os.walk(ICUBWORLD28_PATH): for file in filenames: if fnmatch(file, '.DS_Store'): fm.remove_file(fm.join(dirpath, file)) # Now move all data to an upper folder if needed if not fm.folder_exist(self._train_path) \ or not fm.folder_exist(self._test_path): sub_d = fm.join(dl_folder, fm.listdir(dl_folder)[0]) for e in fm.listdir(sub_d): e_full = fm.join(sub_d, e) # Full path to current element try: # Call copy_file or copy_folder when applicable if fm.file_exist(e_full) is True: fm.copy_file(e_full, dl_folder) elif fm.folder_exist(e_full) is True: fm.copy_folder(e_full, fm.join(dl_folder, e)) except: pass # Check that the main dataset file is now in the correct folder if not fm.folder_exist(self._train_path) \ or not fm.folder_exist(self._test_path): raise RuntimeError("dataset main file not available!") # The subdirectory can now be removed fm.remove_folder(sub_d, force=True)
def _load_files(self, dir_path, img_w, img_h, img_c, img_ext, label_re=None, load_data=True): """Loads any file with given extension inside input folder.""" # Folders/files will be loaded in alphabetical order files_list = sorted(fm.listdir(dir_path)) # Placeholder for patterns/labels CArray patterns = None labels = None for file_name in files_list: # Full path to image file file_path = fm.join(dir_path, file_name) # Load only files of the specified format if fm.splitext(file_name)[1].lower() == img_ext: # Opening image in lazy mode (to verify dimensions etc.) img = Image.open(file_path) # Storing image dimensions... img_w = img_w.append(img.width) img_h = img_h.append(img.height) img_c = img_c.append(len(img.getbands())) # If load_data is True, store features, else store path if load_data is True: # Storing image as a 2D CArray array_img = CArray(img.getdata()).ravel().atleast_2d() else: array_img = CArray([[file_path]]) # Creating the 2D array patterns x features patterns = patterns.append( array_img, axis=0) if patterns is not None else array_img # Consider only the directory name to set the label dir_name = fm.split(dir_path)[1] # label is the image's containing folder name or the re result c_id = dir_name if label_re is None \ else re.search(label_re, dir_name).group(0) labels = labels.append(c_id) if labels is not None \ else CArray(c_id) self.logger.debug("{:} has been loaded..." "".format(fm.join(dir_path, file_name))) return patterns, labels, img_w, img_h, img_c
def _explore_dir(self, dir_path, img_w, img_h, img_c, img_ext, label_re=None, load_data=True): """Explore input directory and load files if leaf.""" # Folders/files will be loaded in alphabetical order items_list = sorted(fm.listdir(dir_path)) # A leaf folder is a folder with only files in it leaf = not any( fm.folder_exist(fm.join(dir_path, item)) for item in items_list) if leaf is True: # Leaf directory, time to load files! return self._load_files(dir_path, img_w, img_h, img_c, img_ext, label_re=label_re, load_data=load_data) # Placeholder for patterns/labels CArray patterns = None labels = None for subdir in items_list: subdir_path = fm.join(dir_path, subdir) # Only consider folders (there could be also files) if not fm.folder_exist(subdir_path): continue # Explore next subfolder patterns_new, labels_new, img_w, img_h, img_c = self._explore_dir( subdir_path, img_w, img_h, img_c, img_ext, label_re=label_re, load_data=load_data) patterns = patterns.append(patterns_new, axis=0) \ if patterns is not None else patterns_new labels = labels.append(labels_new) \ if labels is not None else labels_new return patterns, labels, img_w, img_h, img_c
def _load_files(self, ds_path, img_w, img_h, img_c, img_ext, load_data=True): """Loads any file with given extension inside input folder.""" # Files will be loaded in alphabetical order files_list = sorted(fm.listdir(ds_path)) # Placeholder for patterns CArray patterns = None for file_name in files_list: # Full path to image file file_path = fm.join(ds_path, file_name) # Load only files of the specified format if fm.splitext(file_name)[1].lower() == img_ext: # Opening image in lazy mode (to verify dimensions etc.) img = Image.open(file_path) # Storing image dimensions... img_w = img_w.append(img.width) img_h = img_h.append(img.height) img_c = img_c.append(len(img.getbands())) # If load_data is True, store features, else store path if load_data is True: # Storing image as a 2D CArray array_img = CArray(img.getdata()).ravel().atleast_2d() else: array_img = CArray([[file_path]]) # Creating the 2D array patterns x features patterns = patterns.append( array_img, axis=0) if patterns is not None else array_img self.logger.debug("{:} has been loaded..." "".format(fm.join(ds_path, file_name))) return patterns, img_w, img_h, img_c
def load(self, ds_type, day='day4', icub7=False, resize_shape=(128, 128), crop_shape=None, normalize=True): """Load the dataset. The pre-cropped version of the images is loaded, with size 128 x 128. An additional resize/crop shape could be passed as input if needed. Extra dataset attributes: - 'img_w', 'img_h': size of the images in pixels. - 'y_orig': CArray with the original labels of the objects. Parameters ---------- ds_type : str Identifier of the dataset to download, either 'train' or 'test'. day : str, optional Acquisition day from which to load the images. Default 'day4'. The available options are: 'day1', 'day2', 'day3', 'day4'. icub7 : bool or int, optional If True, load a reduced dataset with 7 objects by taking the 3rd object for each category. Default False. If int, the Nth object for each category will be loaded. resize_shape : tuple, optional Images will be resized to (height, width) shape. Default (128, 128). crop_shape : tuple or None, optional If a tuple, a crop of (height, width) shape will be extracted from the center of each image. Default None. normalize : bool, optional If True, images are normalized between 0-1. Default True. Returns ------- CDataset Output dataset. """ if ds_type == 'train': data_path = self._train_path elif ds_type == 'test': data_path = self._test_path else: raise ValueError("use ds_type = {'train', 'test'}.") day_path = fm.join(data_path, day) if not fm.folder_exist(day_path): raise ValueError("{:} not available.".format(day)) self.logger.info( "Loading iCubWorld{:} {:} {:} dataset from {:}".format( '7' if icub7 else '28', day, ds_type, day_path)) icub7 = 3 if icub7 is True else icub7 # Use the 3rd sub-obj by default x = None y_orig = [] for obj in sorted(fm.listdir(day_path)): # Objects (cup, sponge, ..) obj_path = fm.join(day_path, obj) # Sub-objects (cup1, cup2, ...) for sub_obj in sorted(fm.listdir(obj_path)): if icub7 and sub_obj[-1] != str(icub7): continue # Load only the `icub7`th object self.logger.debug("Loading images for {:}".format(sub_obj)) sub_obj_path = fm.join(obj_path, sub_obj) for f in sorted(fm.listdir(sub_obj_path)): img = Image.open(fm.join(sub_obj_path, f)) if resize_shape is not None: img = resize_img(img, resize_shape) if crop_shape is not None: img = crop_img(img, crop_shape) img = CArray(img.getdata(), dtype='uint8').ravel() x = x.append(img, axis=0) if x is not None else img y_orig.append(sub_obj) # Label is given by sub-obj name # Create the int-based array of labels. Keep original labels in y_orig y_orig = CArray(y_orig) y = CArray(y_orig).unique(return_inverse=True)[1] if normalize is True: x /= 255.0 # Size of images is the crop shape (if any) otherwise, the resize shape img_h, img_w = crop_shape if crop_shape is not None else resize_shape header = CDatasetHeader(img_w=img_w, img_h=img_h, y_orig=y_orig) return CDataset(x, y, header=header)