def _get_prior_parameters_observations(self): self._has_been_used = True parameters = np.load( os.path.join(utils.get_data_root(), "lotka-volterra", "prior-parameters.npy")) observations = np.load( os.path.join(utils.get_data_root(), "lotka-volterra", "prior-observations.npy")) ix = np.random.permutation(range(parameters.shape[0])) return parameters[ix], observations[ix]
def _create_data(self): root = utils.get_data_root() path = os.path.join(root, 'faces', self.name + '.jpg') try: image = io.imread(path) except FileNotFoundError: raise RuntimeError('Unknown face name: {}'.format(self.name)) image = color.rgb2gray(image) self.image = transform.resize(image, [512, 512]) grid = np.array([(x, y) for x in range(self.image.shape[0]) for y in range(self.image.shape[1])]) rotation_matrix = np.array([[0, -1], [1, 0]]) p = self.image.reshape(-1) / sum(self.image.reshape(-1)) ix = np.random.choice(range(len(grid)), size=self.num_points, replace=True, p=p) points = grid[ix].astype(np.float32) points += np.random.rand(self.num_points, 2) # dequantize points /= (self.image.shape[0]) # scale to [0, 1] # assert 0 <= min(points) <= max(points) <= 1 self.data = torch.tensor(points @ rotation_matrix).float() self.data[:, 1] += 1
def get_ground_truth_posterior_samples(self, num_samples=None): """ We have pre-generated posterior samples using MCMC on the product of the analytic likelihood and a uniform prior on [-3, 3]^5. Thus they are ground truth as long as MCMC has behaved well. We load these once if samples have not been loaded before, store them for future use, and return as many as are requested. :param num_samples: int Number of sample to return. :return: torch.Tensor [num_samples, parameter_dim] Batch of posterior samples. """ if self._posterior_samples is None: self._posterior_samples = torch.Tensor( np.load( os.path.join( utils.get_data_root(), "nonlinear-gaussian", "true-posterior-samples.npy", ))) if num_samples is not None: return self._posterior_samples[:num_samples] else: return self._posterior_samples
def __init__(self, split='train', frac=None): path = os.path.join(utils.get_data_root(), 'miniboone', '{}.npy'.format(split)) self.data = np.load(path).astype(np.float32) self.n, self.dim = self.data.shape if frac is not None: self.n = int(frac * self.n)
def get_data_path(data_root=None): if data_root is None: data_root = get_data_root() data_path = data_root + 'Train_Sets/' return data_path
def save_splits(): train, val, test = load_miniboone() splits = (('train', train), ('val', val), ('test', test)) for split in splits: name, data = split file = os.path.join(utils.get_data_root(), 'miniboone', '{}.npy'.format(name)) np.save(file, data)
def __init__(self): n_percentiles = 5 self.perc = np.linspace(0.0, 100.0, n_percentiles) path = os.path.join(utils.get_data_root(), "mg1", "pilot_run_results.pkl") with open(path, "rb") as file: self.whiten_params = pickle.load(file, encoding="bytes")
def _get_prior_parameters_observations(self): """ Lotka-Volterra simulator is expensive, so load prior simulations from disk. :return: np.array, np.array """ self._has_been_used = True file = ("prior-parameters.npy" if not self._gaussian_prior else "prior-parameters-gaussian.npy") parameters = np.load( os.path.join(utils.get_data_root(), "lotka-volterra", file)) file = ("prior-observations.npy" if not self._gaussian_prior else "prior-observations-gaussian.npy") observations = np.load( os.path.join(utils.get_data_root(), "lotka-volterra", file)) ix = np.random.permutation(range(parameters.shape[0])) return parameters[ix], observations[ix]
def __init__(self, split='train', transform=None): self.transform = transform path = os.path.join(utils.get_data_root(), 'omniglot', 'omniglot.mat') rawdata = loadmat(path) if split == 'train': self.data = rawdata['data'].T.reshape(-1, 28, 28) self.targets = rawdata['target'].T elif split == 'test': self.data = rawdata['testdata'].T.reshape(-1, 28, 28) self.targets = rawdata['testtarget'].T else: raise ValueError
def load_miniboone(): def load_data(path): # NOTE: To remember how the pre-processing was done. # data_ = pd.read_csv(root_path, names=[str(x) for x in range(50)], delim_whitespace=True) # print data_.head() # data_ = data_.as_matrix() # # Remove some random outliers # indices = (data_[:, 0] < -100) # data_ = data_[~indices] # # i = 0 # # Remove any features that have too many re-occuring real values. # features_to_remove = [] # for feature in data_.T: # c = Counter(feature) # max_count = np.array([v for k, v in sorted(c.iteritems())])[0] # if max_count > 5: # features_to_remove.append(i) # i += 1 # data_ = data_[:, np.array([i for i in range(data_.shape[1]) if i not in features_to_remove])] # np.save("~/data_/miniboone/data_.npy", data_) data = np.load(path) N_test = int(0.1 * data.shape[0]) data_test = data[-N_test:] data = data[0:-N_test] N_validate = int(0.1 * data.shape[0]) data_validate = data[-N_validate:] data_train = data[0:-N_validate] return data_train, data_validate, data_test def load_data_normalised(path): data_train, data_validate, data_test = load_data(path) data = np.vstack((data_train, data_validate)) mu = data.mean(axis=0) s = data.std(axis=0) data_train = (data_train - mu) / s data_validate = (data_validate - mu) / s data_test = (data_test - mu) / s return data_train, data_validate, data_test return load_data_normalised( path=os.path.join(utils.get_data_root(), 'miniboone', 'data.npy'))
def get_output_folder(data_root=None, modality=None): if data_root is None: data_root = get_data_root() output_folder_structure = get_output_folder_structure() if modality is None: folder = '' elif modality == get_ground_truth_keyword(): folder = output_folder_structure['training']['ground_truth'] else: folder = output_folder_structure['training']['images'] output_folder = data_root + get_output_save_folder() + folder Path(output_folder).mkdir(parents=True, exist_ok=True) return output_folder
def load_gas(): def load_data(file): data = pd.read_pickle(file) data.drop("Meth", axis=1, inplace=True) data.drop("Eth", axis=1, inplace=True) data.drop("Time", axis=1, inplace=True) return data def get_correlation_numbers(data): C = data.corr() A = C > 0.98 B = A.sum(axis=1) return B def load_data_and_clean(file): data = load_data(file) B = get_correlation_numbers(data) while np.any(B > 1): col_to_remove = np.where(B > 1)[0][0] col_name = data.columns[col_to_remove] data.drop(col_name, axis=1, inplace=True) B = get_correlation_numbers(data) data = (data - data.mean()) / data.std() return data.values def load_data_and_clean_and_split(file): data = load_data_and_clean(file) N_test = int(0.1 * data.shape[0]) data_test = data[-N_test:] data_train = data[0:-N_test] N_validate = int(0.1 * data_train.shape[0]) data_validate = data_train[-N_validate:] data_train = data_train[0:-N_validate] return data_train, data_validate, data_test return load_data_and_clean_and_split( file=os.path.join(utils.get_data_root(), 'gas', 'ethylene_CO.pickle'))
def __init__(self): path = os.path.join(utils.get_data_root(), "lotka-volterra", "pilot_run_results.pkl") with open(path, "rb") as file: self.means, self.stds = pickle.load(file, encoding="bytes")
def get_ground_truth_observation(): path = os.path.join(utils.get_data_root(), "lotka-volterra", "obs_stats.pkl") with open(path, "rb") as file: true_observation = pickle.load(file, encoding="bytes") return np.array(true_observation)
def get_ground_truth_observation(self): path = os.path.join(utils.get_data_root(), "mg1", "observed_data.pkl") with open(path, "rb") as file: _, true_observation = pickle.load(file, encoding="bytes") return torch.Tensor(true_observation)
def load_data(): file = os.path.join(utils.get_data_root(), 'power', 'data.npy') return np.load(file)
from torchvision.utils import make_grid, save_image from nde import distributions, transforms, flows import utils import optim import nn as nn_ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt # Capture job id on the cluster sacred.SETTINGS.HOST_INFO.CAPTURED_ENV.append('SLURM_JOB_ID') runs_dir = os.path.join(utils.get_data_root(), 'runs/images') ex = Experiment('decomposition-flows-images') fso = observers.FileStorageObserver.create(runs_dir, priority=1) # I don't like how sacred names run folders. ex.observers.extend([fso, autils.NamingObserver(runs_dir, priority=2)]) # For num_workers > 0 and tensor datasets, bad things happen otherwise. torch.multiprocessing.set_start_method("spawn", force=True) # noinspection PyUnusedLocal @ex.config def config(): # Dataset dataset = 'fashion-mnist'
def run(seed): assert torch.cuda.is_available() device = torch.device('cuda') torch.set_default_tensor_type('torch.cuda.FloatTensor') np.random.seed(seed) torch.manual_seed(seed) # Create training data. data_transform = tvtransforms.Compose( [tvtransforms.ToTensor(), tvtransforms.Lambda(torch.bernoulli)]) if args.dataset_name == 'mnist': dataset = datasets.MNIST(root=os.path.join(utils.get_data_root(), 'mnist'), train=True, download=True, transform=data_transform) test_dataset = datasets.MNIST(root=os.path.join( utils.get_data_root(), 'mnist'), train=False, download=True, transform=data_transform) elif args.dataset_name == 'fashion-mnist': dataset = datasets.FashionMNIST(root=os.path.join( utils.get_data_root(), 'fashion-mnist'), train=True, download=True, transform=data_transform) test_dataset = datasets.FashionMNIST(root=os.path.join( utils.get_data_root(), 'fashion-mnist'), train=False, download=True, transform=data_transform) elif args.dataset_name == 'omniglot': dataset = data_.OmniglotDataset(split='train', transform=data_transform) test_dataset = data_.OmniglotDataset(split='test', transform=data_transform) elif args.dataset_name == 'emnist': rotate = partial(tvF.rotate, angle=-90) hflip = tvF.hflip data_transform = tvtransforms.Compose([ tvtransforms.Lambda(rotate), tvtransforms.Lambda(hflip), tvtransforms.ToTensor(), tvtransforms.Lambda(torch.bernoulli) ]) dataset = datasets.EMNIST(root=os.path.join(utils.get_data_root(), 'emnist'), split='letters', train=True, transform=data_transform, download=True) test_dataset = datasets.EMNIST(root=os.path.join( utils.get_data_root(), 'emnist'), split='letters', train=False, transform=data_transform, download=True) else: raise ValueError if args.dataset_name == 'omniglot': split = -1345 elif args.dataset_name == 'emnist': split = -20000 else: split = -10000 indices = np.arange(len(dataset)) np.random.shuffle(indices) train_indices, val_indices = indices[:split], indices[split:] train_sampler = SubsetRandomSampler(train_indices) val_sampler = SubsetRandomSampler(val_indices) train_loader = data.DataLoader( dataset=dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=4 if args.dataset_name == 'emnist' else 0) train_generator = data_.batch_generator(train_loader) val_loader = data.DataLoader(dataset=dataset, batch_size=1024, sampler=val_sampler, shuffle=False, drop_last=False) val_batch = next(iter(val_loader))[0] test_loader = data.DataLoader( test_dataset, batch_size=16, shuffle=False, drop_last=False, ) # from matplotlib import pyplot as plt # from experiments import cutils # from torchvision.utils import make_grid # fig, ax = plt.subplots(1, 1, figsize=(5, 5)) # cutils.gridimshow(make_grid(val_batch[:64], nrow=8), ax) # plt.show() # quit() def create_linear_transform(): if args.linear_type == 'lu': return transforms.CompositeTransform([ transforms.RandomPermutation(args.latent_features), transforms.LULinear(args.latent_features, identity_init=True) ]) elif args.linear_type == 'svd': return transforms.SVDLinear(args.latent_features, num_householder=4, identity_init=True) elif args.linear_type == 'perm': return transforms.RandomPermutation(args.latent_features) else: raise ValueError def create_base_transform(i, context_features=None): if args.prior_type == 'affine-coupling': return transforms.AffineCouplingTransform( mask=utils.create_alternating_binary_mask( features=args.latent_features, even=(i % 2 == 0)), transform_net_create_fn=lambda in_features, out_features: nn_. ResidualNet(in_features=in_features, out_features=out_features, hidden_features=args.hidden_features, context_features=context_features, num_blocks=args.num_transform_blocks, activation=F.relu, dropout_probability=args.dropout_probability, use_batch_norm=args.use_batch_norm)) elif args.prior_type == 'rq-coupling': return transforms.PiecewiseRationalQuadraticCouplingTransform( mask=utils.create_alternating_binary_mask( features=args.latent_features, even=(i % 2 == 0)), transform_net_create_fn=lambda in_features, out_features: nn_. ResidualNet(in_features=in_features, out_features=out_features, hidden_features=args.hidden_features, context_features=context_features, num_blocks=args.num_transform_blocks, activation=F.relu, dropout_probability=args.dropout_probability, use_batch_norm=args.use_batch_norm), num_bins=args.num_bins, tails='linear', tail_bound=args.tail_bound, apply_unconditional_transform=args. apply_unconditional_transform, ) elif args.prior_type == 'affine-autoregressive': return transforms.MaskedAffineAutoregressiveTransform( features=args.latent_features, hidden_features=args.hidden_features, context_features=context_features, num_blocks=args.num_transform_blocks, use_residual_blocks=True, random_mask=False, activation=F.relu, dropout_probability=args.dropout_probability, use_batch_norm=args.use_batch_norm) elif args.prior_type == 'rq-autoregressive': return transforms.MaskedPiecewiseRationalQuadraticAutoregressiveTransform( features=args.latent_features, hidden_features=args.hidden_features, context_features=context_features, num_bins=args.num_bins, tails='linear', tail_bound=args.tail_bound, num_blocks=args.num_transform_blocks, use_residual_blocks=True, random_mask=False, activation=F.relu, dropout_probability=args.dropout_probability, use_batch_norm=args.use_batch_norm) else: raise ValueError # --------------- # prior # --------------- def create_prior(): if args.prior_type == 'standard-normal': prior = distributions_.StandardNormal((args.latent_features, )) else: distribution = distributions_.StandardNormal( (args.latent_features, )) transform = transforms.CompositeTransform([ transforms.CompositeTransform( [create_linear_transform(), create_base_transform(i)]) for i in range(args.num_flow_steps) ]) transform = transforms.CompositeTransform( [transform, create_linear_transform()]) prior = flows.Flow(transform, distribution) return prior # --------------- # inputs encoder # --------------- def create_inputs_encoder(): if args.approximate_posterior_type == 'diagonal-normal': inputs_encoder = None else: inputs_encoder = nn_.ConvEncoder( context_features=args.context_features, channels_multiplier=16, dropout_probability=args.dropout_probability_encoder_decoder) return inputs_encoder # --------------- # approximate posterior # --------------- def create_approximate_posterior(): if args.approximate_posterior_type == 'diagonal-normal': context_encoder = nn_.ConvEncoder( context_features=args.context_features, channels_multiplier=16, dropout_probability=args.dropout_probability_encoder_decoder) approximate_posterior = distributions_.ConditionalDiagonalNormal( shape=[args.latent_features], context_encoder=context_encoder) else: context_encoder = nn.Linear(args.context_features, 2 * args.latent_features) distribution = distributions_.ConditionalDiagonalNormal( shape=[args.latent_features], context_encoder=context_encoder) transform = transforms.CompositeTransform([ transforms.CompositeTransform([ create_linear_transform(), create_base_transform( i, context_features=args.context_features) ]) for i in range(args.num_flow_steps) ]) transform = transforms.CompositeTransform( [transform, create_linear_transform()]) approximate_posterior = flows.Flow( transforms.InverseTransform(transform), distribution) return approximate_posterior # --------------- # likelihood # --------------- def create_likelihood(): latent_decoder = nn_.ConvDecoder( latent_features=args.latent_features, channels_multiplier=16, dropout_probability=args.dropout_probability_encoder_decoder) likelihood = distributions_.ConditionalIndependentBernoulli( shape=[1, 28, 28], context_encoder=latent_decoder) return likelihood prior = create_prior() approximate_posterior = create_approximate_posterior() likelihood = create_likelihood() inputs_encoder = create_inputs_encoder() model = vae.VariationalAutoencoder( prior=prior, approximate_posterior=approximate_posterior, likelihood=likelihood, inputs_encoder=inputs_encoder) # with torch.no_grad(): # # elbo = model.stochastic_elbo(val_batch[:16].to(device)).mean() # # print(elbo) # elbo = model.stochastic_elbo(val_batch[:16].to(device), num_samples=100).mean() # print(elbo) # log_prob = model.log_prob_lower_bound(val_batch[:16].to(device), num_samples=1200).mean() # print(log_prob) # quit() n_params = utils.get_num_parameters(model) print('There are {} trainable parameters in this model.'.format(n_params)) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer=optimizer, T_max=args.num_training_steps, eta_min=0) def get_kl_multiplier(step): if args.kl_multiplier_schedule == 'constant': return args.kl_multiplier_initial elif args.kl_multiplier_schedule == 'linear': multiplier = min( step / (args.num_training_steps * args.kl_warmup_fraction), 1.) return args.kl_multiplier_initial * (1. + multiplier) # create summary writer and write to log directory timestamp = cutils.get_timestamp() if cutils.on_cluster(): timestamp += '||{}'.format(os.environ['SLURM_JOB_ID']) log_dir = os.path.join(cutils.get_log_root(), args.dataset_name, timestamp) while True: try: writer = SummaryWriter(log_dir=log_dir, max_queue=20) break except FileExistsError: sleep(5) filename = os.path.join(log_dir, 'config.json') with open(filename, 'w') as file: json.dump(vars(args), file) best_val_elbo = -np.inf tbar = tqdm(range(args.num_training_steps)) for step in tbar: model.train() optimizer.zero_grad() scheduler.step(step) batch = next(train_generator)[0].to(device) elbo = model.stochastic_elbo(batch, kl_multiplier=get_kl_multiplier(step)) loss = -torch.mean(elbo) loss.backward() optimizer.step() if (step + 1) % args.monitor_interval == 0: model.eval() with torch.no_grad(): elbo = model.stochastic_elbo(val_batch.to(device)) mean_val_elbo = elbo.mean() if mean_val_elbo > best_val_elbo: best_val_elbo = mean_val_elbo path = os.path.join( cutils.get_checkpoint_root(), '{}-best-val-{}.t'.format(args.dataset_name, timestamp)) torch.save(model.state_dict(), path) writer.add_scalar(tag='val-elbo', scalar_value=mean_val_elbo, global_step=step) writer.add_scalar(tag='best-val-elbo', scalar_value=best_val_elbo, global_step=step) with torch.no_grad(): samples = model.sample(64) fig, ax = plt.subplots(figsize=(10, 10)) cutils.gridimshow(make_grid(samples.view(64, 1, 28, 28), nrow=8), ax) writer.add_figure(tag='vae-samples', figure=fig, global_step=step) plt.close() # load best val model path = os.path.join( cutils.get_checkpoint_root(), '{}-best-val-{}.t'.format(args.dataset_name, timestamp)) model.load_state_dict(torch.load(path)) model.eval() np.random.seed(5) torch.manual_seed(5) # compute elbo on test set with torch.no_grad(): elbo = torch.Tensor([]) log_prob_lower_bound = torch.Tensor([]) for batch in tqdm(test_loader): elbo_ = model.stochastic_elbo(batch[0].to(device)) elbo = torch.cat([elbo, elbo_]) log_prob_lower_bound_ = model.log_prob_lower_bound( batch[0].to(device), num_samples=1000) log_prob_lower_bound = torch.cat( [log_prob_lower_bound, log_prob_lower_bound_]) path = os.path.join( log_dir, '{}-prior-{}-posterior-{}-elbo.npy'.format( args.dataset_name, args.prior_type, args.approximate_posterior_type)) np.save(path, utils.tensor2numpy(elbo)) path = os.path.join( log_dir, '{}-prior-{}-posterior-{}-log-prob-lower-bound.npy'.format( args.dataset_name, args.prior_type, args.approximate_posterior_type)) np.save(path, utils.tensor2numpy(log_prob_lower_bound)) # save elbo and log prob lower bound mean_elbo = elbo.mean() std_elbo = elbo.std() mean_log_prob_lower_bound = log_prob_lower_bound.mean() std_log_prob_lower_bound = log_prob_lower_bound.std() s = 'ELBO: {:.2f} +- {:.2f}, LOG PROB LOWER BOUND: {:.2f} +- {:.2f}'.format( mean_elbo.item(), 2 * std_elbo.item() / np.sqrt(len(test_dataset)), mean_log_prob_lower_bound.item(), 2 * std_log_prob_lower_bound.item() / np.sqrt(len(test_dataset))) filename = os.path.join(log_dir, 'test-results.txt') with open(filename, 'w') as file: file.write(s)
def load_hepmass(): def load_data(path): data_train = pd.read_csv(filepath_or_buffer=os.path.join( path, '1000_train.csv'), index_col=False) data_test = pd.read_csv(filepath_or_buffer=os.path.join( path, '1000_test.csv'), index_col=False) return data_train, data_test def load_data_no_discrete(path): """Loads the positive class examples from the first 10% of the dataset.""" data_train, data_test = load_data(path) # Gets rid of any background noise examples i.e. class label 0. data_train = data_train[data_train[data_train.columns[0]] == 1] data_train = data_train.drop(data_train.columns[0], axis=1) data_test = data_test[data_test[data_test.columns[0]] == 1] data_test = data_test.drop(data_test.columns[0], axis=1) # Because the data_ set is messed up! data_test = data_test.drop(data_test.columns[-1], axis=1) return data_train, data_test def load_data_no_discrete_normalised(path): data_train, data_test = load_data_no_discrete(path) mu = data_train.mean() s = data_train.std() data_train = (data_train - mu) / s data_test = (data_test - mu) / s return data_train, data_test def load_data_no_discrete_normalised_as_array(path): data_train, data_test = load_data_no_discrete_normalised(path) data_train, data_test = data_train.values, data_test.values i = 0 # Remove any features that have too many re-occurring real values. features_to_remove = [] for feature in data_train.T: c = Counter(feature) max_count = np.array([v for k, v in sorted(c.items())])[0] if max_count > 5: features_to_remove.append(i) i += 1 data_train = data_train[:, np.array([ i for i in range(data_train.shape[1]) if i not in features_to_remove ])] data_test = data_test[:, np.array([ i for i in range(data_test.shape[1]) if i not in features_to_remove ])] N = data_train.shape[0] N_validate = int(N * 0.1) data_validate = data_train[-N_validate:] data_train = data_train[0:-N_validate] return data_train, data_validate, data_test return load_data_no_discrete_normalised_as_array( path=os.path.join(utils.get_data_root(), 'hepmass'))
def load_bsds300(): path = os.path.join(utils.get_data_root(), 'bsds300', 'bsds300.hdf5') file = h5py.File(path, 'r') return file['train'], file['validation'], file['test']