def _read_data_train(self, data_dir, num_labeled, val_percent): class_names = listdir_nohidden(data_dir) class_names.sort() num_labeled_per_class = num_labeled / len(class_names) items_x, items_u, items_v = [], [], [] for label, class_name in enumerate(class_names): class_dir = osp.join(data_dir, class_name) imnames = listdir_nohidden(class_dir) # Split into train and val following Oliver et al. 2018 # Set cfg.DATASET.VAL_PERCENT to 0 to not use val data num_val = math.floor(len(imnames) * val_percent) imnames_train = imnames[num_val:] imnames_val = imnames[:num_val] # Note we do shuffle after split random.shuffle(imnames_train) for i, imname in enumerate(imnames_train): impath = osp.join(class_dir, imname) item = Datum(impath=impath, label=label) if (i + 1) <= num_labeled_per_class: items_x.append(item) else: items_u.append(item) for imname in imnames_val: impath = osp.join(class_dir, imname) item = Datum(impath=impath, label=label) items_v.append(item) return items_x, items_u, items_v
def _read_data_test(self, data_dir): class_names = listdir_nohidden(data_dir) class_names.sort() items = [] for label, class_name in enumerate(class_names): class_dir = osp.join(data_dir, class_name) imnames = listdir_nohidden(class_dir) for imname in imnames: impath = osp.join(class_dir, imname) item = Datum(impath=impath, label=label) items.append(item) return items
def _read_data(self, input_domains): items = [] for domain, dname in enumerate(input_domains): domain_dir = osp.join(self.dataset_dir, dname) class_names = listdir_nohidden(domain_dir) class_names.sort() for label, class_name in enumerate(class_names): class_path = osp.join(domain_dir, class_name) imnames = listdir_nohidden(class_path) for imname in imnames: impath = osp.join(class_path, imname) item = Datum(impath=impath, label=label, domain=domain) items.append(item) return items
def _read_data(self, input_domains, split='train'): items = [] for domain, dname in enumerate(input_domains): data_dir = osp.join(self.dataset_dir, dname, split) class_names = listdir_nohidden(data_dir) for class_name in class_names: class_dir = osp.join(data_dir, class_name) imnames = listdir_nohidden(class_dir) label = int(class_name.split('_')[0]) for imname in imnames: impath = osp.join(class_dir, imname) item = Datum(impath=impath, label=label, domain=domain) items.append(item) return items
def _load_data_from_directory(directory): folders = listdir_nohidden(directory) folders.sort() items_ = [] for label, folder in enumerate(folders): impaths = glob.glob(osp.join(directory, folder, '*.jpg')) for impath in impaths: items_.append((impath, label)) return items_
def parse_dir(directory, end_signal, regex_acc, regex_err, args): print('Parsing {}'.format(directory)) subdirs = listdir_nohidden(directory, sort=True) valid_fpaths = [] valid_accs = [] valid_errs = [] for subdir in subdirs: fpath = osp.join(directory, subdir, 'log.txt') assert check_isfile(fpath) good_to_go = False with open(fpath, 'r') as f: lines = f.readlines() for line in lines: line = line.strip() if line == end_signal: good_to_go = True match_acc = regex_acc.search(line) if match_acc and good_to_go: acc = float(match_acc.group(1)) valid_accs.append(acc) valid_fpaths.append(fpath) match_err = regex_err.search(line) if match_err and good_to_go: err = float(match_err.group(1)) valid_errs.append(err) for fpath, acc, err in zip(valid_fpaths, valid_accs, valid_errs): print('file: {}. acc: {:.2f}%. err: {:.2f}%'.format(fpath, acc, err)) acc_mean = np.mean(valid_accs) acc_std = compute_ci95(valid_accs) if args.ci95 else np.std(valid_accs) err_mean = np.mean(valid_errs) err_std = compute_ci95(valid_errs) if args.ci95 else np.std(valid_errs) print('===') print('outcome of directory: {}'.format(directory)) if args.res_format in ['acc', 'acc_and_err']: print('* acc: {:.2f}% +- {:.2f}%'.format(acc_mean, acc_std)) if args.res_format in ['err', 'acc_and_err']: print('* err: {:.2f}% +- {:.2f}%'.format(err_mean, err_std)) print('===') return acc_mean, err_mean
def _read_data_all(self, data_dir): imnames = listdir_nohidden(data_dir) items = [] for imname in imnames: impath = osp.join(data_dir, imname) label = osp.splitext(imname)[0].split('_')[1] if label == 'none': label = -1 else: label = int(label) item = Datum(impath=impath, label=label) items.append(item) return items
def read_image_list(im_dir, n_max=None, n_repeat=None): items = [] for imname in listdir_nohidden(im_dir): imname_noext = osp.splitext(imname)[0] label = int(imname_noext.split('_')[1]) impath = osp.join(im_dir, imname) items.append((impath, label)) if n_max is not None: items = random.sample(items, n_max) if n_repeat is not None: items *= n_repeat return items
def _read_data(self, input_domains, split): items = [] for domain, dname in enumerate(input_domains): dname = dname.upper() path = osp.join(self.dataset_dir, dname, split) folders = listdir_nohidden(path) folders.sort() for label, folder in enumerate(folders): impaths = glob.glob(osp.join(path, folder, '*.jpg')) for impath in impaths: item = Datum(impath=impath, label=label, domain=domain) items.append(item) return items
def read_image_list(im_dir, n_max=None, n_repeat=None): items = [] for imname in listdir_nohidden(im_dir): imname_noext = osp.splitext(imname)[0] label = int(imname_noext.split('_')[1]) impath = osp.join(im_dir, imname) items.append((impath, label)) if n_max is not None: # Note that the sampling process is NOT random, # which follows that in Volpi et al. NIPS'18. items = items[:n_max] if n_repeat is not None: items *= n_repeat return items
def main(args, end_signal): regex_acc = re.compile(r'\* accuracy: ([\.\deE+-]+)%') regex_err = re.compile(r'\* error: ([\.\deE+-]+)%') if args.multi_exp: accs, errs = [], [] for directory in listdir_nohidden(args.directory): directory = osp.join(args.directory, directory) acc, err = parse_dir(directory, end_signal, regex_acc, regex_err, args) accs.append(acc) errs.append(err) acc_mean = np.mean(accs) err_mean = np.mean(errs) print('overall average') print('* acc: {:.2f}%'.format(acc_mean)) print('* err: {:.2f}%'.format(err_mean)) else: parse_dir(args.directory, end_signal, regex_acc, regex_err, args)
def _read_data_train(self, data_dir, fold, fold_file): imnames = listdir_nohidden(data_dir) imnames.sort() items = [] list_idx = list(range(len(imnames))) if fold >= 0: with open(fold_file, 'r') as f: str_idx = f.read().splitlines()[fold] list_idx = np.fromstring(str_idx, dtype=np.uint8, sep=' ') for i in list_idx: imname = imnames[i] impath = osp.join(data_dir, imname) label = osp.splitext(imname)[0].split('_')[1] label = int(label) item = Datum(impath=impath, label=label) items.append(item) return items