def _prepare(self): self.root = edu.get_root(self.NAME) self._data_path = Path(self.root).joinpath("data.p") if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) root = Path(self.root) local_files = dict() local_files[self.FILES[0]] = edu.prompt_download( self.FILES[0], self.URL, root, content_dir="img_align_celeba") if not os.path.exists(os.path.join(root, "img_align_celeba")): self.logger.info("Extracting {}".format( local_files[self.FILES[0]])) edu.unpack(local_files["img_align_celeba.zip"]) for v in self.FILES[1:]: local_files[v] = edu.prompt_download(v, self.URL, root) with open(os.path.join(self.root, "list_eval_partition.txt"), "r") as f: list_eval_partition = f.read().splitlines() fnames = [s[:10] for s in list_eval_partition] list_eval_partition = np.array( [int(s[11:]) for s in list_eval_partition]) with open(os.path.join(self.root, "list_attr_celeba.txt"), "r") as f: list_attr_celeba = f.read().splitlines() attribute_descriptions = list_attr_celeba[1] list_attr_celeba = list_attr_celeba[2:] assert len(list_attr_celeba) == len(list_eval_partition) assert [s[:10] for s in list_attr_celeba] == fnames list_attr_celeba = np.array([[int(x) for x in s[11:].split()] for s in list_attr_celeba]) with open(os.path.join(self.root, "identity_CelebA.txt"), "r") as f: identity_celeba = f.read().splitlines() assert [s[:10] for s in identity_celeba] == fnames identity_celeba = np.array( [int(s[11:]) for s in identity_celeba]) data = { "fname": np.array([ os.path.join("img_align_celeba/{}".format(s)) for s in fnames ]), "partition": list_eval_partition, "identity": identity_celeba, "attributes": list_attr_celeba, } with open(self._data_path, "wb") as f: pickle.dump(data, f) edu.mark_prepared(self.root)
def __init__(self, path): self.path = datautil.get_root("sprites") if not datautil.is_prepared(self.path): fpath = datautil.download_url( "nips2015-analogy-data.tar.gz", "http://www.scottreed.info/files/nips2015-analogy-data.tar.gz", self.path) datautil.unpack(fpath) datautil.mark_prepared(self.path) self.preprocess()
def _prepare(self): self.root = edu.get_root(self.NAME) self._data_path = Path(self.root).joinpath("data.p") if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) root = Path(self.root) urls = dict((v, urllib.parse.urljoin(self.URL, v)) for k, v in self.FILES.items()) local_files = edu.download_urls(urls, target_dir=root) data = dict() for k, v in local_files.items(): data[k] = read_mnist_file(v) with open(self._data_path, "wb") as f: pickle.dump(data, f) edu.mark_prepared(self.root)
def _prepare(self): self.root = edu.get_root(self.NAME) self._data_path = Path(self.root).joinpath("data.p") if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) root = Path(self.root) urls = dict((v, urllib.parse.urljoin(self.URL, v)) for k, v in self.FILES.items()) local_files = edu.download_urls(urls, target_dir=root) edu.unpack(local_files["cifar-10-python.tar.gz"]) base = os.path.join(self.root, "cifar-10-batches-py") labels = list() filenames = list() datas = list() for batch_file in ["data_batch_{}".format(i) for i in range(1, 6)]: with open(os.path.join(base, batch_file), "rb") as f: batch_data = pickle.load(f, encoding="bytes") labels += batch_data["labels".encode()] filenames += [ fname.decode() for fname in batch_data["filenames".encode()] ] datas.append(batch_data["data".encode()]) with open(os.path.join(base, "test_batch"), "rb") as f: test_data = pickle.load(f, encoding="bytes") test_labels = test_data["labels".encode()] test_filenames = [ fname.decode() for fname in test_data["filenames".encode()] ] test_datas = test_data["data".encode()] with open(os.path.join(base, "batches.meta"), "rb") as f: _meta = pickle.load(f, encoding="bytes") meta = { "label_names": [name.decode() for name in _meta["label_names".encode()]], "num_vis": _meta["num_vis".encode()], "num_cases_per_batch": _meta["num_cases_per_batch".encode()], } # convert to (32,32,3) RGB uint8 images = np.concatenate(datas, axis=0) images = np.reshape(images, [-1, 3, 32, 32]) images = np.transpose(images, [0, 2, 3, 1]) test_images = test_datas test_images = np.reshape(test_images, [-1, 3, 32, 32]) test_images = np.transpose(test_images, [0, 2, 3, 1]) filenames = np.array(filenames) test_filenames = np.array(test_filenames) labels = np.array(labels) test_labels = np.array(test_labels) data = { "train": dict(images=images, filenames=filenames, labels=labels), "test": dict(images=test_images, filenames=test_filenames, labels=test_labels), "meta": meta, } with open(self._data_path, "wb") as f: pickle.dump(data, f) edu.mark_prepared(self.root)