def _prepare_data(self): all_im_list = dfutil.get_file_list(self._data_dir, '.jpg') class_dict = {} class_cnt = 0 for im_path in all_im_list: head, tail = ntpath.split(im_path) class_label = tail.split('_')[0] if class_label != '0000' and class_label != '-1': if class_label in class_dict: class_id = class_dict[class_label] self.im_list[class_id].append(im_path) else: class_dict[class_label] = class_cnt try: self.im_list.append([im_path]) except AttributeError: self.im_list = [[]] class_cnt += 1 self._n_class = len(self.im_list) for idx, class_im_list in enumerate(self.im_list): self.im_list[idx] = np.array(self.im_list[idx]) self._shuffle_files()
def _load_file_list(self, data_name_list, data_dir_list): self._file_name_list = [[] for _ in range(len(data_name_list))] self._file_name_list[0] = get_file_list(data_dir_list[0], data_name_list[0]) for file_path in self._file_name_list[0]: drive, path_and_file = os.path.splitdrive(file_path) path, file = os.path.split(path_and_file) file_id, file_extension = os.path.splitext(file) # file_id = re.findall(r'\d+', file)[0] for idx, (data_name, data_dir) in enumerate( zip(data_name_list[1:], data_dir_list[1:])): self._file_name_list[idx + 1].append( os.path.join(data_dir, file_id + data_name)) for idx, file_list in enumerate(self._file_name_list): self._file_name_list[idx] = np.array(file_list) if self._shuffle: self._suffle_file_list() n_sample = int(len(self._file_name_list[0]) * self._load_percentage) for idx, file_list in enumerate(self._file_name_list): self._file_name_list[idx] = file_list[:n_sample] print('{} samples in total.'.format(n_sample))
def _load_file_list(self, data_name_list, data_dir_list): self._file_name_list = [] for data_name, data_dir in zip(data_name_list, data_dir_list): self._file_name_list.append(get_file_list(data_dir, data_name)) if self._shuffle: self._suffle_file_list()
def _load_data(self): im_list = np.array(sorted(dfutil.get_file_list(self._data_dir, 'jpg'))) label_list = [0 for _ in range(len(im_list))] return im_list, label_list