Esempio n. 1
0
    def _prepare(self):
        self.root = edu.get_root(self.NAME)
        self._data_path = Path(self.root).joinpath("data.p")
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))
            root = Path(self.root)
            local_files = dict()

            local_files[self.FILES[0]] = edu.prompt_download(
                self.FILES[0], self.URL, root, content_dir="img_align_celeba")
            if not os.path.exists(os.path.join(root, "img_align_celeba")):
                self.logger.info("Extracting {}".format(
                    local_files[self.FILES[0]]))
                edu.unpack(local_files["img_align_celeba.zip"])

            for v in self.FILES[1:]:
                local_files[v] = edu.prompt_download(v, self.URL, root)

            with open(os.path.join(self.root, "list_eval_partition.txt"),
                      "r") as f:
                list_eval_partition = f.read().splitlines()
                fnames = [s[:10] for s in list_eval_partition]
                list_eval_partition = np.array(
                    [int(s[11:]) for s in list_eval_partition])
            with open(os.path.join(self.root, "list_attr_celeba.txt"),
                      "r") as f:
                list_attr_celeba = f.read().splitlines()
                attribute_descriptions = list_attr_celeba[1]
                list_attr_celeba = list_attr_celeba[2:]
                assert len(list_attr_celeba) == len(list_eval_partition)
                assert [s[:10] for s in list_attr_celeba] == fnames
                list_attr_celeba = np.array([[int(x) for x in s[11:].split()]
                                             for s in list_attr_celeba])
            with open(os.path.join(self.root, "identity_CelebA.txt"),
                      "r") as f:
                identity_celeba = f.read().splitlines()
                assert [s[:10] for s in identity_celeba] == fnames
                identity_celeba = np.array(
                    [int(s[11:]) for s in identity_celeba])

            data = {
                "fname":
                np.array([
                    os.path.join("img_align_celeba/{}".format(s))
                    for s in fnames
                ]),
                "partition":
                list_eval_partition,
                "identity":
                identity_celeba,
                "attributes":
                list_attr_celeba,
            }
            with open(self._data_path, "wb") as f:
                pickle.dump(data, f)
            edu.mark_prepared(self.root)
Esempio n. 2
0
 def __init__(self, path):
     self.path = datautil.get_root("sprites")
     if not datautil.is_prepared(self.path):
         fpath = datautil.download_url(
             "nips2015-analogy-data.tar.gz",
             "http://www.scottreed.info/files/nips2015-analogy-data.tar.gz",
             self.path)
         datautil.unpack(fpath)
         datautil.mark_prepared(self.path)
     self.preprocess()
Esempio n. 3
0
 def _prepare(self):
     self.root = edu.get_root(self.NAME)
     self._data_path = Path(self.root).joinpath("data.p")
     if not edu.is_prepared(self.root):
         # prep
         self.logger.info("Preparing dataset {} in {}".format(
             self.NAME, self.root))
         root = Path(self.root)
         urls = dict((v, urllib.parse.urljoin(self.URL, v))
                     for k, v in self.FILES.items())
         local_files = edu.download_urls(urls, target_dir=root)
         data = dict()
         for k, v in local_files.items():
             data[k] = read_mnist_file(v)
         with open(self._data_path, "wb") as f:
             pickle.dump(data, f)
         edu.mark_prepared(self.root)
Esempio n. 4
0
    def _prepare(self):
        self.root = edu.get_root(self.NAME)
        self._data_path = Path(self.root).joinpath("data.p")
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))
            root = Path(self.root)
            urls = dict((v, urllib.parse.urljoin(self.URL, v))
                        for k, v in self.FILES.items())
            local_files = edu.download_urls(urls, target_dir=root)
            edu.unpack(local_files["cifar-10-python.tar.gz"])
            base = os.path.join(self.root, "cifar-10-batches-py")
            labels = list()
            filenames = list()
            datas = list()
            for batch_file in ["data_batch_{}".format(i) for i in range(1, 6)]:
                with open(os.path.join(base, batch_file), "rb") as f:
                    batch_data = pickle.load(f, encoding="bytes")
                labels += batch_data["labels".encode()]
                filenames += [
                    fname.decode()
                    for fname in batch_data["filenames".encode()]
                ]
                datas.append(batch_data["data".encode()])
            with open(os.path.join(base, "test_batch"), "rb") as f:
                test_data = pickle.load(f, encoding="bytes")
            test_labels = test_data["labels".encode()]
            test_filenames = [
                fname.decode() for fname in test_data["filenames".encode()]
            ]
            test_datas = test_data["data".encode()]
            with open(os.path.join(base, "batches.meta"), "rb") as f:
                _meta = pickle.load(f, encoding="bytes")
            meta = {
                "label_names":
                [name.decode() for name in _meta["label_names".encode()]],
                "num_vis":
                _meta["num_vis".encode()],
                "num_cases_per_batch":
                _meta["num_cases_per_batch".encode()],
            }

            # convert to (32,32,3) RGB uint8
            images = np.concatenate(datas, axis=0)
            images = np.reshape(images, [-1, 3, 32, 32])
            images = np.transpose(images, [0, 2, 3, 1])
            test_images = test_datas
            test_images = np.reshape(test_images, [-1, 3, 32, 32])
            test_images = np.transpose(test_images, [0, 2, 3, 1])

            filenames = np.array(filenames)
            test_filenames = np.array(test_filenames)
            labels = np.array(labels)
            test_labels = np.array(test_labels)

            data = {
                "train":
                dict(images=images, filenames=filenames, labels=labels),
                "test":
                dict(images=test_images,
                     filenames=test_filenames,
                     labels=test_labels),
                "meta":
                meta,
            }
            with open(self._data_path, "wb") as f:
                pickle.dump(data, f)
            edu.mark_prepared(self.root)