def _prepare(self): cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) self.root = os.path.join(cachedir, "autoencoders/data", self.NAME) SRC = { "AwA2-data.zip": "http://cvml.ist.ac.at/AwA2/AwA2-data.zip", } if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) os.makedirs(self.root, exist_ok=True) datadir = os.path.join(self.root, "Animals_with_Attributes2") if not os.path.exists(datadir): datapath = os.path.join(self.root, "AwA2-data.zip") if not os.path.exists(datapath): download(SRC["AwA2-data.zip"], datapath) edu.unpack(datapath) # make filelist images = list() for path, subdirs, files in os.walk( os.path.join(datadir, "JPEGImages")): for name in files: searchname = name.lower() if (searchname.rfind('jpg') != -1 or searchname.rfind('png') != -1 or searchname.rfind('jpeg') != -1): filename = os.path.relpath(os.path.join(path, name), start=self.root) images.append(filename) prng = np.random.RandomState(1) test = set(prng.choice(len(images), 5000, replace=False)) train_images = [ images[i] for i in range(len(images)) if not i in test ] test_images = [images[i] for i in range(len(images)) if i in test] with open(os.path.join(self.root, "train.txt"), "w") as f: f.write("\n".join(train_images) + "\n") with open(os.path.join(self.root, "test.txt"), "w") as f: f.write("\n".join(test_images) + "\n") with open( os.path.join(self.root, "Animals_with_Attributes2/classes.txt"), "r") as f: classes = f.read().splitlines() classes = [cls.split()[-1] for cls in classes] classes = sorted(classes) with open(os.path.join(self.root, "classes.txt"), "w") as f: f.write("\n".join(classes) + "\n") edu.mark_prepared(self.root)
def _prepare(self): self.root = edu.get_root(self.NAME) self._data_path = Path(self.root).joinpath("data.p") if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) root = Path(self.root) local_files = dict() local_files[self.FILES[0]] = edu.prompt_download( self.FILES[0], self.URL, root, content_dir="img_align_celeba") if not os.path.exists(os.path.join(root, "img_align_celeba")): self.logger.info("Extracting {}".format( local_files[self.FILES[0]])) edu.unpack(local_files["img_align_celeba.zip"]) for v in self.FILES[1:]: local_files[v] = edu.prompt_download(v, self.URL, root) with open(os.path.join(self.root, "list_eval_partition.txt"), "r") as f: list_eval_partition = f.read().splitlines() fnames = [s[:10] for s in list_eval_partition] list_eval_partition = np.array( [int(s[11:]) for s in list_eval_partition]) with open(os.path.join(self.root, "list_attr_celeba.txt"), "r") as f: list_attr_celeba = f.read().splitlines() attribute_descriptions = list_attr_celeba[1] list_attr_celeba = list_attr_celeba[2:] assert len(list_attr_celeba) == len(list_eval_partition) assert [s[:10] for s in list_attr_celeba] == fnames list_attr_celeba = np.array([[int(x) for x in s[11:].split()] for s in list_attr_celeba]) with open(os.path.join(self.root, "identity_CelebA.txt"), "r") as f: identity_celeba = f.read().splitlines() assert [s[:10] for s in identity_celeba] == fnames identity_celeba = np.array( [int(s[11:]) for s in identity_celeba]) data = { "fname": np.array([ os.path.join("img_align_celeba/{}".format(s)) for s in fnames ]), "partition": list_eval_partition, "identity": identity_celeba, "attributes": list_attr_celeba, } with open(self._data_path, "wb") as f: pickle.dump(data, f) edu.mark_prepared(self.root)
def __init__(self, path): self.path = datautil.get_root("sprites") if not datautil.is_prepared(self.path): fpath = datautil.download_url( "nips2015-analogy-data.tar.gz", "http://www.scottreed.info/files/nips2015-analogy-data.tar.gz", self.path) datautil.unpack(fpath) datautil.mark_prepared(self.path) self.preprocess()
def _prepare(self): self.random_crop = retrieve(self.config, "ImageNetValidation/random_crop", default=False) cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) self.root = os.path.join(cachedir, "autoencoders/data", self.NAME) self.datadir = os.path.join(self.root, "data") self.txt_filelist = os.path.join(self.root, "filelist.txt") self.expected_length = 50000 if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) datadir = self.datadir if not os.path.exists(datadir): path = os.path.join(self.root, self.FILES[0]) if not os.path.exists(path) or not os.path.getsize( path) == self.SIZES[0]: import academictorrents as at atpath = at.get(self.AT_HASH, datastore=self.root) assert atpath == path self.logger.info("Extracting {} to {}".format(path, datadir)) os.makedirs(datadir, exist_ok=True) with tarfile.open(path, "r:") as tar: tar.extractall(path=datadir) vspath = os.path.join(self.root, self.FILES[1]) if not os.path.exists(vspath) or not os.path.getsize( vspath) == self.SIZES[1]: download(self.VS_URL, vspath) with open(vspath, "r") as f: synset_dict = f.read().splitlines() synset_dict = dict(line.split() for line in synset_dict) self.logger.info("Reorganizing into synset folders") synsets = np.unique(list(synset_dict.values())) for s in synsets: os.makedirs(os.path.join(datadir, s), exist_ok=True) for k, v in synset_dict.items(): src = os.path.join(datadir, k) dst = os.path.join(datadir, v) shutil.move(src, dst) filelist = glob.glob(os.path.join(datadir, "**", "*.JPEG")) filelist = [os.path.relpath(p, start=datadir) for p in filelist] filelist = sorted(filelist) filelist = "\n".join(filelist) + "\n" with open(self.txt_filelist, "w") as f: f.write(filelist) edu.mark_prepared(self.root)
def _prepare(self): self.root = edu.get_root(self.NAME) self._data_path = Path(self.root).joinpath("data.p") if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) root = Path(self.root) urls = dict((v, urllib.parse.urljoin(self.URL, v)) for k, v in self.FILES.items()) local_files = edu.download_urls(urls, target_dir=root) data = dict() for k, v in local_files.items(): data[k] = read_mnist_file(v) with open(self._data_path, "wb") as f: pickle.dump(data, f) edu.mark_prepared(self.root)
def _prepare(self): self.random_crop = retrieve(self.config, "ImageNetTrain/random_crop", default=True) cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) self.root = os.path.join(cachedir, "autoencoders/data", self.NAME) self.datadir = os.path.join(self.root, "data") self.txt_filelist = os.path.join(self.root, "filelist.txt") self.expected_length = 1281167 if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) datadir = self.datadir if not os.path.exists(datadir): path = os.path.join(self.root, self.FILES[0]) if not os.path.exists(path) or not os.path.getsize( path) == self.SIZES[0]: import academictorrents as at atpath = at.get(self.AT_HASH, datastore=self.root) assert atpath == path self.logger.info("Extracting {} to {}".format(path, datadir)) os.makedirs(datadir, exist_ok=True) with tarfile.open(path, "r:") as tar: tar.extractall(path=datadir) self.logger.info("Extracting sub-tars.") subpaths = sorted(glob.glob(os.path.join(datadir, "*.tar"))) for subpath in tqdm(subpaths): subdir = subpath[:-len(".tar")] os.makedirs(subdir, exist_ok=True) with tarfile.open(subpath, "r:") as tar: tar.extractall(path=subdir) filelist = glob.glob(os.path.join(datadir, "**", "*.JPEG")) filelist = [os.path.relpath(p, start=datadir) for p in filelist] filelist = sorted(filelist) filelist = "\n".join(filelist) + "\n" with open(self.txt_filelist, "w") as f: f.write(filelist) edu.mark_prepared(self.root)
def _prepare(self): self.root = edu.get_root(self.NAME) self._data_path = Path(self.root).joinpath("data.p") if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) root = Path(self.root) urls = dict((v, urllib.parse.urljoin(self.URL, v)) for k, v in self.FILES.items()) local_files = edu.download_urls(urls, target_dir=root) edu.unpack(local_files["cifar-10-python.tar.gz"]) base = os.path.join(self.root, "cifar-10-batches-py") labels = list() filenames = list() datas = list() for batch_file in ["data_batch_{}".format(i) for i in range(1, 6)]: with open(os.path.join(base, batch_file), "rb") as f: batch_data = pickle.load(f, encoding="bytes") labels += batch_data["labels".encode()] filenames += [ fname.decode() for fname in batch_data["filenames".encode()] ] datas.append(batch_data["data".encode()]) with open(os.path.join(base, "test_batch"), "rb") as f: test_data = pickle.load(f, encoding="bytes") test_labels = test_data["labels".encode()] test_filenames = [ fname.decode() for fname in test_data["filenames".encode()] ] test_datas = test_data["data".encode()] with open(os.path.join(base, "batches.meta"), "rb") as f: _meta = pickle.load(f, encoding="bytes") meta = { "label_names": [name.decode() for name in _meta["label_names".encode()]], "num_vis": _meta["num_vis".encode()], "num_cases_per_batch": _meta["num_cases_per_batch".encode()], } # convert to (32,32,3) RGB uint8 images = np.concatenate(datas, axis=0) images = np.reshape(images, [-1, 3, 32, 32]) images = np.transpose(images, [0, 2, 3, 1]) test_images = test_datas test_images = np.reshape(test_images, [-1, 3, 32, 32]) test_images = np.transpose(test_images, [0, 2, 3, 1]) filenames = np.array(filenames) test_filenames = np.array(test_filenames) labels = np.array(labels) test_labels = np.array(test_labels) data = { "train": dict(images=images, filenames=filenames, labels=labels), "test": dict(images=test_images, filenames=test_filenames, labels=test_labels), "meta": meta, } with open(self._data_path, "wb") as f: pickle.dump(data, f) edu.mark_prepared(self.root)
def _prepare(self): cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) self.root = os.path.join(cachedir, "autoencoders/data/AnimalFaces") self.logger.info("Using data located at {}".format(self.root)) os.makedirs(self.root, exist_ok=True) self.datadir = os.path.join(self.root, "data") if not edu.is_prepared(self.root): self.logger.info("Preparing dataset {} in {}".format(self.NAME, self.root)) if not os.path.exists(self.datadir): os.makedirs(self.datadir, exist_ok=True) imagenet = ImageNetTrain() coor_path = os.path.join(self.root, "animalface_coordinates.txt") if not os.path.exists(coor_path): download(self.COOR_URL, coor_path) with open(coor_path, "r") as f: animalface_coordinates = f.readlines() for line in tqdm(animalface_coordinates): ls = line.strip().split(' ') img_name = os.path.join(imagenet.datadir, ls[0]) img = Image.open(img_name) img = img.convert('RGB') x = int(ls[1]) y = int(ls[2]) w = int(ls[3]) h = int(ls[4]) crop = img.crop((x, y, w, h)) out_name = os.path.join(self.datadir, '%s_%d_%d_%d_%d.jpg' % (ls[0], x, y, w, h)) os.makedirs(os.path.dirname(out_name), exist_ok=True) crop.save(out_name) train_path = os.path.join(self.root, "animals_list_train.txt") if not os.path.exists(train_path): download(self.TRAIN_URL, train_path) test_path = os.path.join(self.root, "animals_list_test.txt") if not os.path.exists(test_path): download(self.TEST_URL, test_path) shared_train_path = os.path.join(self.root, "shared_animalfaces_train.txt") if not os.path.exists(shared_train_path): download(self.SHARED_TRAIN_URL, shared_train_path) shared_test_path = os.path.join(self.root, "shared_animalfaces_test.txt") if not os.path.exists(shared_test_path): download(self.SHARED_TEST_URL, shared_test_path) restricted_train_path = os.path.join(self.root, "restricted_animalfaces_train.txt") if not os.path.exists(restricted_train_path): download(self.RESTRICTED_TRAIN_URL, restricted_train_path) restricted_test_path = os.path.join(self.root, "restricted_animalfaces_test.txt") if not os.path.exists(restricted_test_path): download(self.RESTRICTED_TEST_URL, restricted_test_path) edu.mark_prepared(self.root)