Esempio n. 1
0
    def _prepare(self):
        cachedir = os.environ.get("XDG_CACHE_HOME",
                                  os.path.expanduser("~/.cache"))
        self.root = os.path.join(cachedir, "autoencoders/data", self.NAME)
        SRC = {
            "AwA2-data.zip": "http://cvml.ist.ac.at/AwA2/AwA2-data.zip",
        }
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))
            os.makedirs(self.root, exist_ok=True)

            datadir = os.path.join(self.root, "Animals_with_Attributes2")
            if not os.path.exists(datadir):
                datapath = os.path.join(self.root, "AwA2-data.zip")
                if not os.path.exists(datapath):
                    download(SRC["AwA2-data.zip"], datapath)
                edu.unpack(datapath)

            # make filelist
            images = list()
            for path, subdirs, files in os.walk(
                    os.path.join(datadir, "JPEGImages")):
                for name in files:
                    searchname = name.lower()
                    if (searchname.rfind('jpg') != -1
                            or searchname.rfind('png') != -1
                            or searchname.rfind('jpeg') != -1):
                        filename = os.path.relpath(os.path.join(path, name),
                                                   start=self.root)
                        images.append(filename)

            prng = np.random.RandomState(1)
            test = set(prng.choice(len(images), 5000, replace=False))
            train_images = [
                images[i] for i in range(len(images)) if not i in test
            ]
            test_images = [images[i] for i in range(len(images)) if i in test]

            with open(os.path.join(self.root, "train.txt"), "w") as f:
                f.write("\n".join(train_images) + "\n")

            with open(os.path.join(self.root, "test.txt"), "w") as f:
                f.write("\n".join(test_images) + "\n")

            with open(
                    os.path.join(self.root,
                                 "Animals_with_Attributes2/classes.txt"),
                    "r") as f:
                classes = f.read().splitlines()
                classes = [cls.split()[-1] for cls in classes]
                classes = sorted(classes)

            with open(os.path.join(self.root, "classes.txt"), "w") as f:
                f.write("\n".join(classes) + "\n")

            edu.mark_prepared(self.root)
Esempio n. 2
0
    def _prepare(self):
        self.root = edu.get_root(self.NAME)
        self._data_path = Path(self.root).joinpath("data.p")
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))
            root = Path(self.root)
            local_files = dict()

            local_files[self.FILES[0]] = edu.prompt_download(
                self.FILES[0], self.URL, root, content_dir="img_align_celeba")
            if not os.path.exists(os.path.join(root, "img_align_celeba")):
                self.logger.info("Extracting {}".format(
                    local_files[self.FILES[0]]))
                edu.unpack(local_files["img_align_celeba.zip"])

            for v in self.FILES[1:]:
                local_files[v] = edu.prompt_download(v, self.URL, root)

            with open(os.path.join(self.root, "list_eval_partition.txt"),
                      "r") as f:
                list_eval_partition = f.read().splitlines()
                fnames = [s[:10] for s in list_eval_partition]
                list_eval_partition = np.array(
                    [int(s[11:]) for s in list_eval_partition])
            with open(os.path.join(self.root, "list_attr_celeba.txt"),
                      "r") as f:
                list_attr_celeba = f.read().splitlines()
                attribute_descriptions = list_attr_celeba[1]
                list_attr_celeba = list_attr_celeba[2:]
                assert len(list_attr_celeba) == len(list_eval_partition)
                assert [s[:10] for s in list_attr_celeba] == fnames
                list_attr_celeba = np.array([[int(x) for x in s[11:].split()]
                                             for s in list_attr_celeba])
            with open(os.path.join(self.root, "identity_CelebA.txt"),
                      "r") as f:
                identity_celeba = f.read().splitlines()
                assert [s[:10] for s in identity_celeba] == fnames
                identity_celeba = np.array(
                    [int(s[11:]) for s in identity_celeba])

            data = {
                "fname":
                np.array([
                    os.path.join("img_align_celeba/{}".format(s))
                    for s in fnames
                ]),
                "partition":
                list_eval_partition,
                "identity":
                identity_celeba,
                "attributes":
                list_attr_celeba,
            }
            with open(self._data_path, "wb") as f:
                pickle.dump(data, f)
            edu.mark_prepared(self.root)
Esempio n. 3
0
 def __init__(self, path):
     self.path = datautil.get_root("sprites")
     if not datautil.is_prepared(self.path):
         fpath = datautil.download_url(
             "nips2015-analogy-data.tar.gz",
             "http://www.scottreed.info/files/nips2015-analogy-data.tar.gz",
             self.path)
         datautil.unpack(fpath)
         datautil.mark_prepared(self.path)
     self.preprocess()
Esempio n. 4
0
    def _prepare(self):
        self.random_crop = retrieve(self.config,
                                    "ImageNetValidation/random_crop",
                                    default=False)
        cachedir = os.environ.get("XDG_CACHE_HOME",
                                  os.path.expanduser("~/.cache"))
        self.root = os.path.join(cachedir, "autoencoders/data", self.NAME)
        self.datadir = os.path.join(self.root, "data")
        self.txt_filelist = os.path.join(self.root, "filelist.txt")
        self.expected_length = 50000
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))

            datadir = self.datadir
            if not os.path.exists(datadir):
                path = os.path.join(self.root, self.FILES[0])
                if not os.path.exists(path) or not os.path.getsize(
                        path) == self.SIZES[0]:
                    import academictorrents as at
                    atpath = at.get(self.AT_HASH, datastore=self.root)
                    assert atpath == path

                self.logger.info("Extracting {} to {}".format(path, datadir))
                os.makedirs(datadir, exist_ok=True)
                with tarfile.open(path, "r:") as tar:
                    tar.extractall(path=datadir)

                vspath = os.path.join(self.root, self.FILES[1])
                if not os.path.exists(vspath) or not os.path.getsize(
                        vspath) == self.SIZES[1]:
                    download(self.VS_URL, vspath)

                with open(vspath, "r") as f:
                    synset_dict = f.read().splitlines()
                    synset_dict = dict(line.split() for line in synset_dict)

                self.logger.info("Reorganizing into synset folders")
                synsets = np.unique(list(synset_dict.values()))
                for s in synsets:
                    os.makedirs(os.path.join(datadir, s), exist_ok=True)
                for k, v in synset_dict.items():
                    src = os.path.join(datadir, k)
                    dst = os.path.join(datadir, v)
                    shutil.move(src, dst)

            filelist = glob.glob(os.path.join(datadir, "**", "*.JPEG"))
            filelist = [os.path.relpath(p, start=datadir) for p in filelist]
            filelist = sorted(filelist)
            filelist = "\n".join(filelist) + "\n"
            with open(self.txt_filelist, "w") as f:
                f.write(filelist)

            edu.mark_prepared(self.root)
Esempio n. 5
0
 def _prepare(self):
     self.root = edu.get_root(self.NAME)
     self._data_path = Path(self.root).joinpath("data.p")
     if not edu.is_prepared(self.root):
         # prep
         self.logger.info("Preparing dataset {} in {}".format(
             self.NAME, self.root))
         root = Path(self.root)
         urls = dict((v, urllib.parse.urljoin(self.URL, v))
                     for k, v in self.FILES.items())
         local_files = edu.download_urls(urls, target_dir=root)
         data = dict()
         for k, v in local_files.items():
             data[k] = read_mnist_file(v)
         with open(self._data_path, "wb") as f:
             pickle.dump(data, f)
         edu.mark_prepared(self.root)
Esempio n. 6
0
    def _prepare(self):
        self.random_crop = retrieve(self.config,
                                    "ImageNetTrain/random_crop",
                                    default=True)
        cachedir = os.environ.get("XDG_CACHE_HOME",
                                  os.path.expanduser("~/.cache"))
        self.root = os.path.join(cachedir, "autoencoders/data", self.NAME)
        self.datadir = os.path.join(self.root, "data")
        self.txt_filelist = os.path.join(self.root, "filelist.txt")
        self.expected_length = 1281167
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))

            datadir = self.datadir
            if not os.path.exists(datadir):
                path = os.path.join(self.root, self.FILES[0])
                if not os.path.exists(path) or not os.path.getsize(
                        path) == self.SIZES[0]:
                    import academictorrents as at
                    atpath = at.get(self.AT_HASH, datastore=self.root)
                    assert atpath == path

                self.logger.info("Extracting {} to {}".format(path, datadir))
                os.makedirs(datadir, exist_ok=True)
                with tarfile.open(path, "r:") as tar:
                    tar.extractall(path=datadir)

                self.logger.info("Extracting sub-tars.")
                subpaths = sorted(glob.glob(os.path.join(datadir, "*.tar")))
                for subpath in tqdm(subpaths):
                    subdir = subpath[:-len(".tar")]
                    os.makedirs(subdir, exist_ok=True)
                    with tarfile.open(subpath, "r:") as tar:
                        tar.extractall(path=subdir)

            filelist = glob.glob(os.path.join(datadir, "**", "*.JPEG"))
            filelist = [os.path.relpath(p, start=datadir) for p in filelist]
            filelist = sorted(filelist)
            filelist = "\n".join(filelist) + "\n"
            with open(self.txt_filelist, "w") as f:
                f.write(filelist)

            edu.mark_prepared(self.root)
Esempio n. 7
0
    def _prepare(self):
        self.root = edu.get_root(self.NAME)
        self._data_path = Path(self.root).joinpath("data.p")
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))
            root = Path(self.root)
            urls = dict((v, urllib.parse.urljoin(self.URL, v))
                        for k, v in self.FILES.items())
            local_files = edu.download_urls(urls, target_dir=root)
            edu.unpack(local_files["cifar-10-python.tar.gz"])
            base = os.path.join(self.root, "cifar-10-batches-py")
            labels = list()
            filenames = list()
            datas = list()
            for batch_file in ["data_batch_{}".format(i) for i in range(1, 6)]:
                with open(os.path.join(base, batch_file), "rb") as f:
                    batch_data = pickle.load(f, encoding="bytes")
                labels += batch_data["labels".encode()]
                filenames += [
                    fname.decode()
                    for fname in batch_data["filenames".encode()]
                ]
                datas.append(batch_data["data".encode()])
            with open(os.path.join(base, "test_batch"), "rb") as f:
                test_data = pickle.load(f, encoding="bytes")
            test_labels = test_data["labels".encode()]
            test_filenames = [
                fname.decode() for fname in test_data["filenames".encode()]
            ]
            test_datas = test_data["data".encode()]
            with open(os.path.join(base, "batches.meta"), "rb") as f:
                _meta = pickle.load(f, encoding="bytes")
            meta = {
                "label_names":
                [name.decode() for name in _meta["label_names".encode()]],
                "num_vis":
                _meta["num_vis".encode()],
                "num_cases_per_batch":
                _meta["num_cases_per_batch".encode()],
            }

            # convert to (32,32,3) RGB uint8
            images = np.concatenate(datas, axis=0)
            images = np.reshape(images, [-1, 3, 32, 32])
            images = np.transpose(images, [0, 2, 3, 1])
            test_images = test_datas
            test_images = np.reshape(test_images, [-1, 3, 32, 32])
            test_images = np.transpose(test_images, [0, 2, 3, 1])

            filenames = np.array(filenames)
            test_filenames = np.array(test_filenames)
            labels = np.array(labels)
            test_labels = np.array(test_labels)

            data = {
                "train":
                dict(images=images, filenames=filenames, labels=labels),
                "test":
                dict(images=test_images,
                     filenames=test_filenames,
                     labels=test_labels),
                "meta":
                meta,
            }
            with open(self._data_path, "wb") as f:
                pickle.dump(data, f)
            edu.mark_prepared(self.root)
Esempio n. 8
0
    def _prepare(self):
        cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
        self.root = os.path.join(cachedir, "autoencoders/data/AnimalFaces")
        self.logger.info("Using data located at {}".format(self.root))

        os.makedirs(self.root, exist_ok=True)
        self.datadir = os.path.join(self.root, "data")

        if not edu.is_prepared(self.root):
            self.logger.info("Preparing dataset {} in {}".format(self.NAME, self.root))

            if not os.path.exists(self.datadir):
                os.makedirs(self.datadir, exist_ok=True)
                imagenet = ImageNetTrain()

                coor_path = os.path.join(self.root, "animalface_coordinates.txt")
                if not os.path.exists(coor_path):
                    download(self.COOR_URL, coor_path)

                with open(coor_path, "r") as f:
                    animalface_coordinates = f.readlines()

                for line in tqdm(animalface_coordinates):
                    ls = line.strip().split(' ')
                    img_name = os.path.join(imagenet.datadir, ls[0])
                    img = Image.open(img_name)
                    img = img.convert('RGB')
                    x = int(ls[1])
                    y = int(ls[2])
                    w = int(ls[3])
                    h = int(ls[4])
                    crop = img.crop((x, y, w, h))

                    out_name = os.path.join(self.datadir,
                                            '%s_%d_%d_%d_%d.jpg' % (ls[0], x, y, w, h))
                    os.makedirs(os.path.dirname(out_name), exist_ok=True)
                    crop.save(out_name)

            train_path = os.path.join(self.root, "animals_list_train.txt")
            if not os.path.exists(train_path):
                download(self.TRAIN_URL, train_path)

            test_path = os.path.join(self.root, "animals_list_test.txt")
            if not os.path.exists(test_path):
                download(self.TEST_URL, test_path)

            shared_train_path = os.path.join(self.root, "shared_animalfaces_train.txt")
            if not os.path.exists(shared_train_path):
                download(self.SHARED_TRAIN_URL, shared_train_path)

            shared_test_path = os.path.join(self.root, "shared_animalfaces_test.txt")
            if not os.path.exists(shared_test_path):
                download(self.SHARED_TEST_URL, shared_test_path)

            restricted_train_path = os.path.join(self.root, "restricted_animalfaces_train.txt")
            if not os.path.exists(restricted_train_path):
                download(self.RESTRICTED_TRAIN_URL, restricted_train_path)

            restricted_test_path = os.path.join(self.root, "restricted_animalfaces_test.txt")
            if not os.path.exists(restricted_test_path):
                download(self.RESTRICTED_TEST_URL, restricted_test_path)

            edu.mark_prepared(self.root)