def _make_train_and_valid_dataloader(self): if self._data_augmentation: transform = training_transform_augmented else: transform = training_transform_not_augmented train_dataset = datasets.CIFAR10(root=config.get_data_dir(), train=True, download=True, transform=transform) valid_dataset = datasets.CIFAR10(root=config.get_data_dir(), train=True, download=True, transform=training_transform_not_augmented) train_loader, valid_loader = self._make_train_and_valid_dataloader_helper(train_dataset, valid_dataset) return train_loader, valid_loader
def _make_train_and_valid_dataloader(self): transform = transforms.ToTensor() train_dataset = datasets.MNIST(root=config.get_data_dir(), train=True, download=True, transform=transform) valid_dataset = datasets.MNIST(root=config.get_data_dir(), train=True, download=True, transform=transform) train_loader, valid_loader = self._make_train_and_valid_dataloader_helper( train_dataset, valid_dataset) return train_loader, valid_loader
def load_label_dict(dataset): """Get dict that translates from label number to humanly-readable class (e.g. from 1 -> automobile on cifar 10) Args: dataset (str): Name of the dataset. Returns: dict: Dictionary that translates from class number to class label. """ if dataset == "cifar10": with open( os.path.join(config.get_data_dir(), "cifar-10/batches.meta.txt")) as lookup_file: label_dict = lookup_file.readlines() elif dataset == "cifar100": with open( os.path.join(config.get_data_dir(), "cifar-100/fine_label_names.txt")) as lookup_file: label_dict = lookup_file.readlines() elif dataset == "fmnist": label_dict = dict([ (0, "T-shirt"), (1, "Trouser"), (2, "Pullover"), (3, "Dress"), (4, "Coat"), (5, "Sandal"), (6, "Shirt"), (7, "Sneaker"), (8, "Bag"), (9, "Ankle boot"), ]) elif dataset == "imagenet": label_file = os.path.join( os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))), "imagenet_labels.txt", ) # Read from text file label_dict = {} i = 0 with open(label_file) as f: for line in f: label_dict[i] = line.rstrip() i += 1 else: label_dict = IdentityDict() return label_dict
def _make_test_dataloader(self): transform = training_transform_not_augmented test_dataset = datasets.CIFAR100(root=config.get_data_dir(), train=False, download=True, transform=transform) return self._make_dataloader(test_dataset, sampler=None)
def _make_train_datasets(self): """Creates the three MNIST datasets stemming from the training part of the data set, i.e. the training set, the training evaluation set, and the validation set. Returns: A tf.data.Dataset instance with batches of training data. A tf.data.Dataset instance with batches of training eval data. A tf.data.Dataset instance with batches of validation data. """ data_dir = config.get_data_dir() train_images_file = os.path.join( data_dir, "mnist", "train-images-idx3-ubyte.gz" ) train_labels_file = os.path.join( data_dir, "mnist", "train-labels-idx1-ubyte.gz" ) data = self._load_dataset(train_images_file, train_labels_file) valid_data = data.take(self._train_eval_size) train_data = data.skip(self._train_eval_size) train_data = self._make_dataset(train_data, shuffle=True) train_eval_data = train_data.take( self._train_eval_size // self._batch_size ) valid_data = self._make_dataset(valid_data, shuffle=False) return train_data, train_eval_data, valid_data
def _make_train_datasets(self): """Creates the three SVHN datasets stemming from the training part of the data set, i.e. the training set, the training evaluation set, and the validation set. Returns: A tf.data.Dataset instance with batches of training data. A tf.data.Dataset instance with batches of training eval data. A tf.data.Dataset instance with batches of validation data. """ pattern = os.path.join(config.get_data_dir(), "svhn", "data_batch_*.bin") data = self._load_dataset(pattern) valid_data = data.take(self._train_eval_size) train_data = data.skip(self._train_eval_size) train_data = self._make_dataset( train_data, data_augmentation=self._data_augmentation, shuffle=True) train_eval_data = train_data.take(self._train_eval_size // self._batch_size) valid_data = self._make_dataset(valid_data, data_augmentation=False, shuffle=False) return train_data, train_eval_data, valid_data
def _make_test_dataloader(self): transform = transforms.ToTensor() test_dataset = datasets.MNIST(root=config.get_data_dir(), train=False, download=True, transform=transform) return self._make_dataloader(test_dataset, sampler=None)
def _make_test_dataloader(self): transform = training_transform_not_augmented test_dataset = datasets.SVHN(root=config.get_data_dir(), split='test', download=True, transform=transform) return self._make_dataloader(test_dataset, sampler=None)
def _make_train_dataset(self): """Creates the Tolstoi training dataset. Returns: A tf.data.Dataset instance with batches of training data. """ filepath = os.path.join(config.get_data_dir(), "tolstoi", "train.npy") return self._make_dataset(filepath)
def _make_test_dataloader(self): test_dataset = datasets.CIFAR10( root=config.get_data_dir(), train=False, download=True, transform=self._transform, ) return self._make_dataloader(test_dataset, sampler=None)
def _make_train_and_valid_dataloader(self): if self._data_augmentation: transform = training_transform_augmented else: transform = training_transform_not_augmented train_dataset = datasets.SVHN(root=config.get_data_dir(), split='train', download=True, transform=transform) # we want the validation set to be of the same size as the test set, so we do NOT use the 'extra' dataset that is available for SVHN valid_dataset = datasets.SVHN( root=config.get_data_dir(), split='train', download=True, transform=training_transform_not_augmented) train_loader, valid_loader = self._make_train_and_valid_dataloader_helper( train_dataset, valid_dataset) return train_loader, valid_loader
def _make_test_dataset(self): """Creates the CIFAR-100 test dataset. Returns: A tf.data.Dataset instance with batches of test data. """ pattern = os.path.join(config.get_data_dir(), "cifar-100", "test.bin") return self._make_dataset( pattern, data_augmentation=False, shuffle=False)
def _make_train_and_valid_dataloader(self): train_dataset = datasets.CIFAR10( root=config.get_data_dir(), train=True, download=True, transform=self._transform, ) valid_dataset = datasets.CIFAR10( root=config.get_data_dir(), train=True, download=True, transform=self._transform, ) train_loader, valid_loader = self._make_train_and_valid_dataloader_helper( train_dataset, valid_dataset) return train_loader, valid_loader
def _make_train_dataset(self): """Creates the CIFAR-100 training dataset. Returns: A tf.data.Dataset instance with batches of training data. """ pattern = os.path.join(config.get_data_dir(), "cifar-100", "train.bin") return self._make_dataset( pattern, data_augmentation=self._data_augmentation, shuffle=True)
def _make_test_dataset(self): """Creates the Tolstoi test dataset. Returns: A tf.data.Dataset instance with batches of test data. """ filepath = os.path.join(config.get_data_dir(), "tolstoi", "test.npy") data = np.load(filepath) return self._make_dataset(data)
def _make_train_dataset(self): """Creates the MNIST training dataset. Returns: A tf.data.Dataset instance with batches of training data. """ data_dir = config.get_data_dir() train_images_file = os.path.join(data_dir, "mnist", "train-images-idx3-ubyte.gz") train_labels_file = os.path.join(data_dir, "mnist", "train-labels-idx1-ubyte.gz") return self._make_dataset( train_images_file, train_labels_file, shuffle=True)
def _make_test_dataset(self): """Creates the SVHN test dataset. Returns: A tf.data.Dataset instance with batches of test data. """ pattern = os.path.join(config.get_data_dir(), "svhn", "test_batch.bin") test_data = self._load_dataset(pattern) return self._make_dataset(test_data, data_augmentation=False, shuffle=False)
def _make_test_dataset(self): """Creates the MNIST test dataset. Returns: A tf.data.Dataset instance with batches of test data. """ data_dir = config.get_data_dir() test_images_file = os.path.join(data_dir, "mnist", "t10k-images-idx3-ubyte.gz") test_labels_file = os.path.join(data_dir, "mnist", "t10k-labels-idx1-ubyte.gz") return self._make_dataset( test_images_file, test_labels_file, shuffle=False)
def load_label_dict(dataset): """Get dict that translates from label number to humanly-readable class (e.g. from 1 -> automobile on cifar 10) Args: dataset (str): Name of the dataset. Returns: dict: Dictionary that translates from class number to class label. """ if dataset == "tolstoi": filepath = os.path.join(config.get_data_dir(), "tolstoi/vocab.pkl") label_dict = pickle.load(open(filepath, "rb")) else: label_dict = IdentityDict() return label_dict
def _make_test_dataset(self): """Creates the ImageNet test dataset. Returns: A tf.data.Dataset instance with batches of test data. """ pattern = os.path.join(config.get_data_dir(), "imagenet", "validation-*") test_data = self._load_dataset(pattern) return self._make_dataset( test_data, per_image_standardization=True, random_crop=False, random_flip_left_right=False, distort_color=False, shuffle=False, )
def _make_train_datasets(self): """Creates the three ImageNet datasets stemming from the training part of the data set, i.e. the training set, the training evaluation set, and the validation set. Returns: A tf.data.Dataset instance with batches of training data. A tf.data.Dataset instance with batches of training eval data. A tf.data.Dataset instance with batches of validation data. """ pattern = os.path.join(config.get_data_dir(), "imagenet", "train-*") data = self._load_dataset(pattern) valid_data = data.take(self._train_eval_size) train_data = data.skip(self._train_eval_size) train_data = self._make_dataset( train_data, per_image_standardization=True, random_crop=self._data_augmentation, random_flip_left_right=self._data_augmentation, distort_color=False, shuffle=True, ) train_eval_data = train_data.take(self._train_eval_size // self._batch_size) valid_data = self._make_dataset( valid_data, per_image_standardization=True, random_crop=False, random_flip_left_right=False, distort_color=False, shuffle=False, ) return train_data, train_eval_data, valid_data
def _make_train_datasets(self): """Creates the three Tolstoi datasets stemming from the training part of the data set, i.e. the training set, the training evaluation set, and the validation set. Returns: A tf.data.Dataset instance with batches of training data. A tf.data.Dataset instance with batches of training eval data. A tf.data.Dataset instance with batches of validation data. """ filepath = os.path.join(config.get_data_dir(), "tolstoi", "train.npy") data = np.load(filepath) valid_data = data[0:self._train_eval_size] train_data = data[self._train_eval_size:] train_data = self._make_dataset(train_data) train_eval_data = train_data.take( self._train_eval_size // (self._batch_size * self._seq_length)) valid_data = self._make_dataset(valid_data) return train_data, train_eval_data, valid_data