Exemple #1
0
def _folderize_imagenet_one_of_each():  # one-off script
    olddir = IMAGENET_ONE_OF_EACH_PATH
    newdir = IMAGENET_ONE_OF_EACH_FLOW_PATH
    files.ensure_dir_exists(newdir)
    old_files = files.list_files(olddir, endswith='.JPEG', abs_paths=True)
    for f in files.list_files(olddir, endswith='.JPEG', abs_paths=True):
        basename = os.path.basename(f)
        label = basename.split('_')[0]
        subdir = os.path.join(newdir, label)
        files.ensure_dir_exists(subdir)
        newpath = os.path.join(subdir, basename)
        # newpath = os.path.join(newdir, )
        # print("oldpath: ", f, os.path.exists(f))
        # print("newpath: ", newpath)
        shutil.copy(f, newpath)
Exemple #2
0
def load_test_data_tiny(layout='nhwc', dtype=None):
    # no labels given for "true" test set, so use the "val" subset as the
    # test set
    test_dir = os.path.join(IMAGENET_TINY_PATH, 'val')
    imgs_subdir = os.path.join(test_dir, 'images')
    img_paths = files.list_files(
        imgs_subdir, endswith='.JPEG', abs_paths=True)
    assert len(img_paths) == 10000  # wrong number of val images?

    # load images
    imgs = [image_utils.load_jpg(f, layout=layout,
                                 dtype=dtype)[np.newaxis, :, :, :]
            for f in img_paths]
    X = np.concatenate(imgs, axis=0)

    # load labels  # TODO make sure this computation is correct
    lbls_path = os.path.join(test_dir, 'val_annotations.txt')
    with open(lbls_path, 'r') as f:
        lines = f.readlines()
    fnames = [line.split()[0] for line in lines]
    class_ids = [line.split()[1] for line in lines]
    # complicated way that doesn't rely on annotations being sorted
    fname_to_class_id = dict(zip(fnames, class_ids))
    img_fnames = [os.path.basename(pth) for pth in img_paths]
    img_class_ids = [fname_to_class_id[fname] for fname in img_fnames]
    labels = _imagenet_tiny_cls_to_number(img_class_ids)
    y = np.array(labels, dtype=np.int32)

    return X, y
Exemple #3
0
def load_train_data_tiny(layout='nhwc', dtype=None, verbose=1):
    train_dir = os.path.join(IMAGENET_TINY_PATH, 'train')
    subdirs = files.list_subdirs(train_dir)
    all_classes = subdirs
    assert len(all_classes) == 200  # wrong number of classes??
    subdir_paths = files.list_subdirs(train_dir, abs_paths=True)

    all_imgs = []
    all_labels = []
    for i, pth in enumerate(np.sort(subdir_paths)):
        classname = os.path.basename(pth)
        if verbose > 0:
            print("loading images for class {}...".format(classname))

        imgs_subdir = os.path.join(pth, 'images')
        img_paths = files.list_files(
            imgs_subdir, endswith='.JPEG', abs_paths=True)
        assert len(img_paths) == 500  # supposed to be 500 examples per class...
        imgs = [image_utils.load_jpg(f, layout=layout,
                                     dtype=dtype)[np.newaxis, :, :, :]
                for f in img_paths]
        all_imgs += imgs
        lbl = _imagenet_tiny_cls_to_number(classname)
        all_labels += [lbl] * len(img_paths)

    X = np.concatenate(all_imgs, axis=0)
    y = np.array(all_labels, dtype=np.int32)

    return X, y
Exemple #4
0
def _make_imagenet_k_of_each(k=10):
    out_path = '../datasets/imagenet-{:03d}-of-each'.format(k)
    print("writing to path: ", out_path)
    src_dir = IMAGENET_TRAIN_PATH
    for synset in files.list_subdirs(src_dir):
        subdir_path = os.path.join(src_dir, synset)
        img_paths = sorted(files.list_files(subdir_path, abs_paths=True))
        img_paths = img_paths[:k]

        new_subdir = os.path.join(out_path, synset)
        files.ensure_dir_exists(new_subdir)
        for path in img_paths:
            fname = os.path.basename(path)
            new_path = os.path.join(new_subdir, fname)
            shutil.copy(path, new_path)
Exemple #5
0
def load_data_one_of_each(layout='nhwc', dtype=None, size=(224, 224)):
    # np_save_file = os.path.join(IMAGENET_ONE_OF_EACH_PATH, 'oneOfEach.npy')
    # cached_exists = os.path.exists(np_save_file)
    # if cached_exists:
    #     return np.load(np_save_file)

    img_paths = files.list_files(IMAGENET_ONE_OF_EACH_PATH, endswith='.JPEG',
                                 abs_paths=True)
    assert len(img_paths) == 1000   # should be 1000 images...
    imgs = [image_utils.load_jpg(f, layout=layout, dtype=dtype, resample=size)
            for f in img_paths]

    if size is not None:  # can only concat if same size
        imgs = [img[np.newaxis, :, :, :] for img in imgs]
        X = np.concatenate(imgs, axis=0)
    else:
        X = imgs

    # XXX this is a total hack that will break if we get >1 img per class, and
    # already (probably) doesn't match up with the synsets
    # lbls = [os.path.basename(path).split('_')[0] for path in img_paths]
    y = np.arange(len(X))
    return X, y