def _folderize_imagenet_one_of_each(): # one-off script olddir = IMAGENET_ONE_OF_EACH_PATH newdir = IMAGENET_ONE_OF_EACH_FLOW_PATH files.ensure_dir_exists(newdir) old_files = files.list_files(olddir, endswith='.JPEG', abs_paths=True) for f in files.list_files(olddir, endswith='.JPEG', abs_paths=True): basename = os.path.basename(f) label = basename.split('_')[0] subdir = os.path.join(newdir, label) files.ensure_dir_exists(subdir) newpath = os.path.join(subdir, basename) # newpath = os.path.join(newdir, ) # print("oldpath: ", f, os.path.exists(f)) # print("newpath: ", newpath) shutil.copy(f, newpath)
def load_test_data_tiny(layout='nhwc', dtype=None): # no labels given for "true" test set, so use the "val" subset as the # test set test_dir = os.path.join(IMAGENET_TINY_PATH, 'val') imgs_subdir = os.path.join(test_dir, 'images') img_paths = files.list_files( imgs_subdir, endswith='.JPEG', abs_paths=True) assert len(img_paths) == 10000 # wrong number of val images? # load images imgs = [image_utils.load_jpg(f, layout=layout, dtype=dtype)[np.newaxis, :, :, :] for f in img_paths] X = np.concatenate(imgs, axis=0) # load labels # TODO make sure this computation is correct lbls_path = os.path.join(test_dir, 'val_annotations.txt') with open(lbls_path, 'r') as f: lines = f.readlines() fnames = [line.split()[0] for line in lines] class_ids = [line.split()[1] for line in lines] # complicated way that doesn't rely on annotations being sorted fname_to_class_id = dict(zip(fnames, class_ids)) img_fnames = [os.path.basename(pth) for pth in img_paths] img_class_ids = [fname_to_class_id[fname] for fname in img_fnames] labels = _imagenet_tiny_cls_to_number(img_class_ids) y = np.array(labels, dtype=np.int32) return X, y
def load_train_data_tiny(layout='nhwc', dtype=None, verbose=1): train_dir = os.path.join(IMAGENET_TINY_PATH, 'train') subdirs = files.list_subdirs(train_dir) all_classes = subdirs assert len(all_classes) == 200 # wrong number of classes?? subdir_paths = files.list_subdirs(train_dir, abs_paths=True) all_imgs = [] all_labels = [] for i, pth in enumerate(np.sort(subdir_paths)): classname = os.path.basename(pth) if verbose > 0: print("loading images for class {}...".format(classname)) imgs_subdir = os.path.join(pth, 'images') img_paths = files.list_files( imgs_subdir, endswith='.JPEG', abs_paths=True) assert len(img_paths) == 500 # supposed to be 500 examples per class... imgs = [image_utils.load_jpg(f, layout=layout, dtype=dtype)[np.newaxis, :, :, :] for f in img_paths] all_imgs += imgs lbl = _imagenet_tiny_cls_to_number(classname) all_labels += [lbl] * len(img_paths) X = np.concatenate(all_imgs, axis=0) y = np.array(all_labels, dtype=np.int32) return X, y
def _make_imagenet_k_of_each(k=10): out_path = '../datasets/imagenet-{:03d}-of-each'.format(k) print("writing to path: ", out_path) src_dir = IMAGENET_TRAIN_PATH for synset in files.list_subdirs(src_dir): subdir_path = os.path.join(src_dir, synset) img_paths = sorted(files.list_files(subdir_path, abs_paths=True)) img_paths = img_paths[:k] new_subdir = os.path.join(out_path, synset) files.ensure_dir_exists(new_subdir) for path in img_paths: fname = os.path.basename(path) new_path = os.path.join(new_subdir, fname) shutil.copy(path, new_path)
def load_data_one_of_each(layout='nhwc', dtype=None, size=(224, 224)): # np_save_file = os.path.join(IMAGENET_ONE_OF_EACH_PATH, 'oneOfEach.npy') # cached_exists = os.path.exists(np_save_file) # if cached_exists: # return np.load(np_save_file) img_paths = files.list_files(IMAGENET_ONE_OF_EACH_PATH, endswith='.JPEG', abs_paths=True) assert len(img_paths) == 1000 # should be 1000 images... imgs = [image_utils.load_jpg(f, layout=layout, dtype=dtype, resample=size) for f in img_paths] if size is not None: # can only concat if same size imgs = [img[np.newaxis, :, :, :] for img in imgs] X = np.concatenate(imgs, axis=0) else: X = imgs # XXX this is a total hack that will break if we get >1 img per class, and # already (probably) doesn't match up with the synsets # lbls = [os.path.basename(path).split('_')[0] for path in img_paths] y = np.arange(len(X)) return X, y