def __init__(self, dim=20, n_points=2000, dtype='float32', device="cpu"): len_train = n_points//2 X_unnorm = t.from_numpy(np.random.uniform(low=-1., high=1., size=[len_train, dim]).astype(dtype)) y_unnorm = t.from_numpy(np.random.uniform(low=-1., high=1., size=[len_train, 1]).astype(dtype)) X_test_unnorm = t.from_numpy(np.random.uniform(low=1., high=2., size=[n_points-len_train, dim]).astype(dtype)) y_test_unnorm = t.from_numpy(np.random.uniform(low=1., high=2., size=[n_points-len_train, 1]).astype(dtype)) X_unnorm = t.cat([X_unnorm, X_test_unnorm]) y_unnorm = t.cat([y_unnorm, y_test_unnorm]) index_train = np.arange(len_train) index_test = np.arange(len_train, n_points) # record unnormalized dataset self.unnorm = Dataset(X_unnorm, y_unnorm, index_train, index_test, device) # compute normalization constants based on training set self.X_std = t.std(self.unnorm.train_X, 0) self.X_std[self.X_std == 0] = 1. # ensure we don't divide by zero self.X_mean = t.mean(self.unnorm.train_X, 0) self.y_mean = t.mean(self.unnorm.train_y) self.y_std = t.std(self.unnorm.train_y) X_norm = (self.unnorm.X - self.X_mean)/self.X_std y_norm = (self.unnorm.y - self.y_mean)/self.y_std self.norm = Dataset(X_norm, y_norm, index_train, index_test, device) self.num_train_set = self.unnorm.X.shape[0] self.in_shape = self.unnorm.X.shape[1:] self.out_shape = self.unnorm.y.shape[1:]
def __init__(self, dataset, model, batch_size=None, dtype='float32', device="cpu"): if batch_size is None: new_y = model(dataset.norm.X).sample() else: dataloader_train = t.utils.data.DataLoader(dataset.norm.train, batch_size=batch_size) dataloader_test = t.utils.data.DataLoader(dataset.norm.test, batch_size=batch_size) batch_preds = [] for dataloader in [dataloader_train, dataloader_test]: for batch_x, _ in dataloader: batch_preds.append(model(batch_x).sample()) new_y = t.cat(batch_preds) # split into train and test index_train = np.arange(len(dataset.norm.train_X)) index_test = np.arange(len(dataset.norm.train_X), len(dataset.norm.X)) # record unnormalized dataset self.unnorm = Dataset(dataset.unnorm.X, new_y, index_train, index_test, device) self.norm = Dataset(dataset.norm.X, new_y, index_train, index_test, device) self.num_train_set = self.unnorm.X.shape[0] self.in_shape = self.unnorm.X.shape[1:] self.out_shape = self.unnorm.y.shape[1:]
def __init__(self, dtype='float32', device="cpu", download=False): _ROOT = os.path.abspath(os.path.dirname(__file__)) dataset_dir = f'{_ROOT}/mnist/' # load data data_train = torchvision.datasets.MNIST(dataset_dir, download=download, train=True) data_test = torchvision.datasets.MNIST(dataset_dir, download=download, train=False) # get data into right shape and type X_unnorm = t.from_numpy( np.concatenate([data_train.data, data_test.data]).astype(dtype)).reshape([-1, 784]) y = t.from_numpy( np.concatenate([data_train.targets, data_test.targets]).astype('int')) # train / test split index_train = np.arange(len(data_train)) index_test = np.arange(len(data_train), len(data_train) + len(data_test)) # create unnormalized data set self.unnorm = Dataset(X_unnorm, y, index_train, index_test, device) # create normalized data set X_norm = self.unnorm.X / 255. self.norm = Dataset(X_norm, y, index_train, index_test, device) # save some data shapes self.num_train_set = self.unnorm.X.shape[0] self.in_shape = self.unnorm.X.shape[1:] self.out_shape = self.unnorm.y.shape[1:]
def __init__(self, dataset, split, dtype='float32', device="cpu"): _ROOT = os.path.abspath(os.path.dirname(__file__)) dataset_dir = f'{_ROOT}/{dataset}/' data = np.loadtxt(f'{dataset_dir}/data.txt').astype(getattr(np, dtype)) index_features = np.loadtxt(f'{dataset_dir}/index_features.txt') index_target = np.loadtxt(f'{dataset_dir}/index_target.txt') X_unnorm = t.from_numpy(data[:, index_features.astype(int)]) y_unnorm = t.from_numpy( data[:, index_target.astype(int):index_target.astype(int) + 1]) # split into train and test index_train = np.loadtxt( f'{dataset_dir}/index_train_{split}.txt').astype(int) index_test = np.loadtxt( f'{dataset_dir}/index_test_{split}.txt').astype(int) # record unnormalized dataset self.unnorm = Dataset(X_unnorm, y_unnorm, index_train, index_test, device) # compute normalization constants based on training set self.X_std = t.std(self.unnorm.train_X, 0) self.X_std[self.X_std == 0] = 1. # ensure we don't divide by zero self.X_mean = t.mean(self.unnorm.train_X, 0) self.y_mean = t.mean(self.unnorm.train_y) self.y_std = t.std(self.unnorm.train_y) X_norm = (self.unnorm.X - self.X_mean) / self.X_std y_norm = (self.unnorm.y - self.y_mean) / self.y_std self.norm = Dataset(X_norm, y_norm, index_train, index_test, device) self.num_train_set = self.unnorm.X.shape[0] self.in_shape = self.unnorm.X.shape[1:] self.out_shape = self.unnorm.y.shape[1:]
def __init__(self, dtype='float32', device="cpu", download=False): _ROOT = os.path.abspath(os.path.dirname(__file__)) dataset_dir = f'{_ROOT}/mnist/' # load data data_train = torchvision.datasets.MNIST(dataset_dir, download=download, train=True) data_test = torchvision.datasets.MNIST(dataset_dir, download=download, train=False) # Rotate the images np.random.seed(1337) data_test_rot_small = np.zeros_like(data_test.data) labels_rot_small = np.zeros_like(data_test.targets) for i, img in enumerate(data_test.data): angle = np.random.randint(low=-45, high=45) img_rot = ndimage.rotate(img, angle, reshape=False) data_test_rot_small[i] = img_rot labels_rot_small[i] = data_test.targets[i] data_test_rot_large = np.zeros_like(data_test.data) labels_rot_large = np.zeros_like(data_test.targets) for i, img in enumerate(data_test.data): angle = np.random.randint(low=-90, high=90) img_rot = ndimage.rotate(img, angle, reshape=False) data_test_rot_large[i] = img_rot labels_rot_large[i] = data_test.targets[i] # get data into right shape and type X_unnorm = t.from_numpy( np.concatenate([ data_train.data, data_test.data, data_test_rot_small, data_test_rot_large ]).astype(dtype)).reshape([-1, 784]) y = t.from_numpy( np.concatenate([ data_train.targets, data_test.targets, labels_rot_small, labels_rot_large ]).astype('int')) # train / test split index_train = np.arange(len(data_train)) index_test = np.arange(len(data_train), len(data_train) + 3 * len(data_test)) # create unnormalized data set self.unnorm = Dataset(X_unnorm, y, index_train, index_test, device) # create normalized data set X_norm = self.unnorm.X / 255. self.norm = Dataset(X_norm, y, index_train, index_test, device) # save some data shapes self.num_train_set = self.unnorm.X.shape[0] self.in_shape = self.unnorm.X.shape[1:] self.out_shape = self.unnorm.y.shape[1:]