def test_critic(self, lstm_hidden_size): obs_spec = TensorSpec((3, 20, 20), torch.float32) action_spec = TensorSpec((5, ), torch.float32) input_spec = (obs_spec, action_spec) observation_conv_layer_params = ((8, 3, 1), (16, 3, 2, 1)) action_fc_layer_params = (10, 8) joint_fc_layer_params = (6, 4) image = obs_spec.zeros(outer_dims=(1, )) action = action_spec.randn(outer_dims=(1, )) network_input = (image, action) network_ctor, state = self._init(lstm_hidden_size) critic_net = network_ctor( input_spec, observation_conv_layer_params=observation_conv_layer_params, action_fc_layer_params=action_fc_layer_params, joint_fc_layer_params=joint_fc_layer_params) value, state = critic_net._test_forward() self.assertEqual(value.shape, (1, )) if lstm_hidden_size is None: self.assertEqual(state, ()) value, state = critic_net(network_input, state) self.assertEqual(critic_net.output_spec, TensorSpec(())) # (batch_size,) self.assertEqual(value.shape, (1, )) # test make_parallel pnet = critic_net.make_parallel(6) if lstm_hidden_size is not None: # shape of state should be [B, n, ...] self.assertRaises(AssertionError, pnet, network_input, state) state = alf.nest.map_structure( lambda x: x.unsqueeze(1).expand(x.shape[0], 6, x.shape[1]), state) if lstm_hidden_size is None: self.assertTrue(isinstance(pnet, ParallelCriticNetwork)) else: self.assertTrue(isinstance(pnet, NaiveParallelNetwork)) value, state = pnet(network_input, state) self.assertEqual(pnet.output_spec, TensorSpec((6, ))) self.assertEqual(value.shape, (1, 6))
def test_parallel_network_output_size(self, replicas): batch_size = 128 input_spec = TensorSpec((100, ), torch.float32) # a dummy encoding network which ouputs the input network = EncodingNetwork(input_tensor_spec=input_spec) pnet = network.make_parallel(replicas) nnet = alf.networks.network.NaiveParallelNetwork(network, replicas) def _check_output_size(embedding): p_output, _ = pnet(embedding) n_output, _ = nnet(embedding) self.assertTrue(p_output.shape == n_output.shape) self.assertTrue(p_output.shape[1:] == pnet._output_spec.shape) # the case with shared inputs embedding = input_spec.randn(outer_dims=(batch_size, )) _check_output_size(embedding) # the case with non-shared inputs embedding = input_spec.randn(outer_dims=(batch_size, replicas)) _check_output_size(embedding)
def test_bayesian_linear_regression(self, par_vi='svgd3', num_particles=256, train_batch_size=10): """ The hypernetwork is trained to generate the parameter vector for a linear regressor. The target linear regressor is :math:`y = X\beta + e`, where :math:`e\sim N(0, I)` is random noise, :math:`X` is the input data matrix, and :math:`y` is target ouputs. The posterior of :math:`\beta` has a closed-form :math:`p(\beta|X,y)\sim N((X^TX)^{-1}X^Ty, X^TX)`. For a linear generator with weight W and bias b, and takes standard Gaussian noise as input, the output follows a Gaussian :math:`N(b, WW^T)`, which should match the posterior :math:`p(\beta|X,y)` for both svgd and gfsf. """ input_size = 3 input_spec = TensorSpec((input_size, ), torch.float32) output_dim = 1 batch_size = 100 inputs = input_spec.randn(outer_dims=(batch_size, )) beta = torch.rand(input_size, output_dim) + 5. print("beta: {}".format(beta)) noise = torch.randn(batch_size, output_dim) targets = inputs @ beta + noise true_cov = torch.inverse( inputs.t() @ inputs) # + torch.eye(input_size)) true_mean = true_cov @ inputs.t() @ targets noise_dim = 3 algorithm = HyperNetwork( input_tensor_spec=input_spec, last_layer_param=(output_dim, False), last_activation=math_ops.identity, noise_dim=noise_dim, # hidden_layers=(16, ), hidden_layers=None, loss_type='regression', par_vi=par_vi, optimizer=alf.optimizers.Adam(lr=1e-3)) print("ground truth mean: {}".format(true_mean)) print("ground truth cov: {}".format(true_cov)) print("ground truth cov norm: {}".format(true_cov.norm())) def _train(train_batch=None, entropy_regularization=None): if train_batch is None: perm = torch.randperm(batch_size) idx = perm[:train_batch_size] train_inputs = inputs[idx] train_targets = targets[idx] else: train_inputs, train_targets = train_batch if entropy_regularization is None: entropy_regularization = train_batch_size / batch_size alg_step = algorithm.train_step( inputs=(train_inputs, train_targets), entropy_regularization=entropy_regularization, num_particles=num_particles) loss_info, params = algorithm.update_with_gradient(alg_step.info) def _test(i): params = algorithm.sample_parameters(num_particles=200) computed_mean = params.mean(0) computed_cov = self.cov(params) print("-" * 68) weight = algorithm._generator._net._fc_layers[0].weight print("norm of generator weight: {}".format(weight.norm())) learned_cov = weight @ weight.t() learned_mean = algorithm._generator._net._fc_layers[0].bias pred_step = algorithm.predict_step(inputs, params=params) sampled_preds = pred_step.output.squeeze() # [batch, n_particles] computed_preds = inputs @ computed_mean # [batch] predicts = inputs @ learned_mean # [batch] spred_err = torch.norm((sampled_preds - targets).mean(1)) pred_err = torch.norm(predicts - targets.squeeze()) smean_err = torch.norm(computed_mean - true_mean.squeeze()) smean_err = smean_err / torch.norm(true_mean) mean_err = torch.norm(learned_mean - true_mean.squeeze()) mean_err = mean_err / torch.norm(true_mean) scov_err = torch.norm(computed_cov - true_cov) scov_err = scov_err / torch.norm(true_cov) cov_err = torch.norm(learned_cov - true_cov) cov_err = cov_err / torch.norm(true_cov) print("train_iter {}: pred err {}".format(i, pred_err)) print("train_iter {}: sampled pred err {}".format(i, spred_err)) print("train_iter {}: mean err {}".format(i, mean_err)) print("train_iter {}: sampled mean err {}".format(i, smean_err)) print("train_iter {}: sampled cov err {}".format(i, scov_err)) print("train_iter {}: cov err {}".format(i, cov_err)) print("learned_cov norm: {}".format(learned_cov.norm())) train_iter = 5000 for i in range(train_iter): _train() if i % 1000 == 0: _test(i) learned_mean = algorithm._generator._net._fc_layers[0].bias mean_err = torch.norm(learned_mean - true_mean.squeeze()) mean_err = mean_err / torch.norm(true_mean) weight = algorithm._generator._net._fc_layers[0].weight learned_cov = weight @ weight.t() cov_err = torch.norm(learned_cov - true_cov) cov_err = cov_err / torch.norm(true_cov) print("-" * 68) print("train_iter {}: mean err {}".format(train_iter, mean_err)) print("train_iter {}: cov err {}".format(train_iter, cov_err)) self.assertLess(mean_err, 0.5) self.assertLess(cov_err, 0.5)
def test_functional_par_vi_algorithm(self, par_vi='svgd', function_vi=False, num_particles=256, train_batch_size=10): """ The hypernetwork is trained to generate the parameter vector for a linear regressor. The target linear regressor is :math:`y = X\beta + e`, where :math:`e\sim N(0, I)` is random noise, :math:`X` is the input data matrix, and :math:`y` is target ouputs. The posterior of :math:`\beta` has a closed-form :math:`p(\beta|X,y)\sim N((X^TX)^{-1}X^Ty, X^TX)`. For a linear generator with weight W and bias b, and takes standard Gaussian noise as input, the output follows a Gaussian :math:`N(b, WW^T)`, which should match the posterior :math:`p(\beta|X,y)` for both svgd and gfsf. """ print('par vi: {}\nfunction_vi: {}\nparticles: {}\nbatch size: {}'. format(par_vi, function_vi, num_particles, train_batch_size)) input_size = 3 input_spec = TensorSpec((input_size, ), torch.float32) output_dim = 1 batch_size = 100 inputs = input_spec.randn(outer_dims=(batch_size, )) beta = torch.rand(input_size, output_dim) + 5. print("beta: {}".format(beta)) beta = torch.rand(input_size, output_dim) + 5. print("beta: {}".format(beta)) beta = torch.rand(input_size, output_dim) + 5. print("beta: {}".format(beta)) noise = torch.randn(batch_size, output_dim) targets = inputs @ beta + noise true_cov = torch.inverse( inputs.t() @ inputs) # + torch.eye(input_size)) true_mean = true_cov @ inputs.t() @ targets algorithm = FuncParVIAlgorithm( input_tensor_spec=input_spec, last_layer_param=(output_dim, False), last_activation=math_ops.identity, num_particles=num_particles, loss_type='regression', par_vi=par_vi, function_vi=function_vi, function_bs=train_batch_size, critic_hidden_layers=(3, ), critic_l2_weight=10, critic_use_bn=True, optimizer=alf.optimizers.Adam(lr=1e-2), critic_optimizer=alf.optimizers.Adam(lr=1e-2)) print("ground truth mean: {}".format(true_mean)) print("ground truth cov: {}".format(true_cov)) print("ground truth cov norm: {}".format(true_cov.norm())) def _train(train_batch=None, entropy_regularization=None): if train_batch is None: perm = torch.randperm(batch_size) idx = perm[:train_batch_size] train_inputs = inputs[idx] train_targets = targets[idx] else: train_inputs, train_targets = train_batch if entropy_regularization is None: entropy_regularization = train_batch_size / batch_size alg_step = algorithm.train_step( inputs=(train_inputs, train_targets), entropy_regularization=entropy_regularization) loss_info, params = algorithm.update_with_gradient(alg_step.info) def _test(i): params = algorithm.particles computed_mean = params.mean(0) computed_cov = self.cov(params) print("-" * 68) pred_step = algorithm.predict_step(inputs) preds = pred_step.output.squeeze() # [batch, n_particles] computed_preds = inputs @ computed_mean # [batch] pred_err = torch.norm((preds - targets).mean(1)) mean_err = torch.norm(computed_mean - true_mean.squeeze()) mean_err = mean_err / torch.norm(true_mean) cov_err = torch.norm(computed_cov - true_cov) cov_err = cov_err / torch.norm(true_cov) print("train_iter {}: pred err {}".format(i, pred_err)) print("train_iter {}: mean err {}".format(i, mean_err)) print("train_iter {}: cov err {}".format(i, cov_err)) print("computed_cov norm: {}".format(computed_cov.norm())) train_iter = 50000 for i in range(train_iter): _train() if i % 1000 == 0: _test(i) params = algorithm.particles computed_mean = params.mean(0) computed_cov = self.cov(params) mean_err = torch.norm(computed_mean - true_mean.squeeze()) mean_err = mean_err / torch.norm(true_mean) cov_err = torch.norm(computed_cov - true_cov) cov_err = cov_err / torch.norm(true_cov) print("-" * 68) print("train_iter {}: mean err {}".format(train_iter, mean_err)) print("train_iter {}: cov err {}".format(train_iter, cov_err)) self.assertLess(mean_err, 0.5) self.assertLess(cov_err, 0.5)
class VaeTest(alf.test.TestCase): def setUp(self): super().setUp() self._input_spec = TensorSpec((1, )) self._epochs = 10 self._batch_size = 100 self._latent_dim = 2 self._loss_f = math_ops.square def test_vae(self): """Test for one dimensional Gaussion.""" encoder = vae.VariationalAutoEncoder( self._latent_dim, input_tensor_spec=self._input_spec) decoding_layers = FC(self._latent_dim, 1) optimizer = torch.optim.Adam( list(encoder.parameters()) + list(decoding_layers.parameters()), lr=0.1) x_train = self._input_spec.randn(outer_dims=(10000, )) x_test = self._input_spec.randn(outer_dims=(10, )) for _ in range(self._epochs): x_train = x_train[torch.randperm(x_train.shape[0])] for i in range(0, x_train.shape[0], self._batch_size): optimizer.zero_grad() batch = x_train[i:i + self._batch_size] alg_step = encoder.train_step(batch) outputs = decoding_layers(alg_step.output) loss = torch.mean(100 * self._loss_f(batch - outputs) + alg_step.info.loss) loss.backward() optimizer.step() y_test = decoding_layers(encoder.train_step(x_test).output) reconstruction_loss = float(torch.mean(self._loss_f(x_test - y_test))) print("reconstruction_loss:", reconstruction_loss) self.assertLess(reconstruction_loss, 0.05) def test_conditional_vae(self): """Test for one dimensional Gaussion, conditioned on a Bernoulli variable. """ prior_input_spec = BoundedTensorSpec((), 'int64') z_prior_network = EncodingNetwork( TensorSpec( (prior_input_spec.maximum - prior_input_spec.minimum + 1, )), fc_layer_params=(10, ) * 2, last_layer_size=2 * self._latent_dim, last_activation=math_ops.identity) preprocess_network = EncodingNetwork( input_tensor_spec=( z_prior_network.input_tensor_spec, self._input_spec, z_prior_network.output_spec, ), preprocessing_combiner=NestConcat(), fc_layer_params=(10, ) * 2, last_layer_size=self._latent_dim, last_activation=math_ops.identity) encoder = vae.VariationalAutoEncoder( self._latent_dim, preprocess_network=preprocess_network, z_prior_network=z_prior_network) decoding_layers = FC(self._latent_dim, 1) optimizer = torch.optim.Adam( list(encoder.parameters()) + list(decoding_layers.parameters()), lr=0.1) x_train = self._input_spec.randn(outer_dims=(10000, )) y_train = x_train.clone() y_train[:5000] = y_train[:5000] + 1.0 pr_train = torch.cat([ prior_input_spec.zeros(outer_dims=(5000, )), prior_input_spec.ones(outer_dims=(5000, )) ], dim=0) x_test = self._input_spec.randn(outer_dims=(100, )) y_test = x_test.clone() y_test[:50] = y_test[:50] + 1.0 pr_test = torch.cat([ prior_input_spec.zeros(outer_dims=(50, )), prior_input_spec.ones(outer_dims=(50, )) ], dim=0) pr_test = torch.nn.functional.one_hot( pr_test, int(z_prior_network.input_tensor_spec.shape[0])).to(torch.float32) for _ in range(self._epochs): idx = torch.randperm(x_train.shape[0]) x_train = x_train[idx] y_train = y_train[idx] pr_train = pr_train[idx] for i in range(0, x_train.shape[0], self._batch_size): optimizer.zero_grad() batch = x_train[i:i + self._batch_size] y_batch = y_train[i:i + self._batch_size] pr_batch = torch.nn.functional.one_hot( pr_train[i:i + self._batch_size], int(z_prior_network.input_tensor_spec.shape[0])).to( torch.float32) alg_step = encoder.train_step([pr_batch, batch]) outputs = decoding_layers(alg_step.output) loss = torch.mean(100 * self._loss_f(y_batch - outputs) + alg_step.info.loss) loss.backward() optimizer.step() y_hat_test = decoding_layers( encoder.train_step([pr_test, x_test]).output) reconstruction_loss = float( torch.mean(self._loss_f(y_test - y_hat_test))) print("reconstruction_loss:", reconstruction_loss) self.assertLess(reconstruction_loss, 0.05)