def test_regex_matches_are_initialized_correctly(self): class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.linear_1_with_funky_name = torch.nn.Linear(5, 10) self.linear_2 = torch.nn.Linear(10, 5) self.conv = torch.nn.Conv1d(5, 5, 5) def forward(self, inputs): # pylint: disable=arguments-differ pass # pyhocon does funny things if there's a . in a key. This test makes sure that we # handle these kinds of regexes correctly. json_params = """{"initializer": [ ["conv", {"type": "constant", "val": 5}], ["funky_na.*bi", {"type": "constant", "val": 7}] ]} """ params = Params(pyhocon.ConfigFactory.parse_string(json_params)) initializers = InitializerApplicator.from_params(params['initializer']) model = Net() initializers(model) for parameter in model.conv.parameters(): assert torch.equal(parameter.data, torch.ones(parameter.size()) * 5) parameter = model.linear_1_with_funky_name.bias assert torch.equal(parameter.data, torch.ones(parameter.size()) * 7)
def test_remote_tensor_multi_var_methods(self): hook = TorchHook(verbose=False) local = hook.local_worker remote = VirtualWorker(hook, 1) local.add_worker(remote) x = torch.FloatTensor([[1, 2], [4, 3], [5, 6]]) x.send(remote) y, z = torch.max(x, 1) assert torch.equal(y.get(), torch.FloatTensor([2, 4, 6])) assert torch.equal(z.get(), torch.LongTensor([1, 0, 1])) x = torch.FloatTensor([[0, 0], [1, 0]]).send(remote) y, z = torch.qr(x) assert (y.get() == torch.FloatTensor([[0, -1], [-1, 0]])).all() assert (z.get() == torch.FloatTensor([[-1, 0], [0, 0]])).all() x = torch.arange(1, 6).send(remote) y, z = torch.kthvalue(x, 4) assert (y.get() == torch.FloatTensor([4])).all() assert (z.get() == torch.LongTensor([3])).all() x = torch.FloatTensor([[0, 0], [1, 1]]).send(remote) y, z = torch.eig(x, True) assert (y.get() == torch.FloatTensor([[1, 0], [0, 0]])).all() assert ((z.get() == torch.FloatTensor([[0, 0], [1, 0]])) == torch.ByteTensor([[1, 0], [1, 0]])).all() x = torch.zeros(3, 3).send(remote) w, y, z = torch.svd(x) assert (w.get() == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all() assert (y.get() == torch.FloatTensor([0, 0, 0])).all() assert (z.get() == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
def test_add_output_dim(self, cuda=False): for double in (False, True): tkwargs = { "device": torch.device("cuda") if cuda else torch.device("cpu"), "dtype": torch.double if double else torch.float, } original_batch_shape = torch.Size([2]) # check exception is raised X = torch.rand(2, 1, **tkwargs) with self.assertRaises(ValueError): add_output_dim(X=X, original_batch_shape=original_batch_shape) # test no new batch dims X = torch.rand(2, 2, 1, **tkwargs) X_out, output_dim_idx = add_output_dim( X=X, original_batch_shape=original_batch_shape ) self.assertTrue(torch.equal(X_out, X.unsqueeze(0))) self.assertEqual(output_dim_idx, 0) # test new batch dims X = torch.rand(3, 2, 2, 1, **tkwargs) X_out, output_dim_idx = add_output_dim( X=X, original_batch_shape=original_batch_shape ) self.assertTrue(torch.equal(X_out, X.unsqueeze(1))) self.assertEqual(output_dim_idx, 1)
def test_degenerate_GPyTorchPosterior(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # singular covariance matrix degenerate_covar = torch.tensor( [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=device ) mean = torch.rand(3, dtype=dtype, device=device) mvn = MultivariateNormal(mean, lazify(degenerate_covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) variance_exp = degenerate_covar.diag().unsqueeze(-1) self.assertTrue(torch.equal(posterior.variance, variance_exp)) # rsample with warnings.catch_warnings(record=True) as w: # we check that the p.d. warning is emitted - this only # happens once per posterior, so we need to check only once samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape) b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) with warnings.catch_warnings(record=True) as w: b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def test_q_noisy_expected_improvement(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # the event shape is `b x q x t` = 1 x 2 x 1 samples_noisy = torch.tensor([1.0, 0.0], device=device, dtype=dtype) samples_noisy = samples_noisy.view(1, 2, 1) # X_baseline is `q' x d` = 1 x 1 X_baseline = torch.zeros(1, 1, device=device, dtype=dtype) mm_noisy = MockModel(MockPosterior(samples=samples_noisy)) # X is `q x d` = 1 x 1 X = torch.zeros(1, 1, device=device, dtype=dtype) # basic test sampler = IIDNormalSampler(num_samples=2) acqf = qNoisyExpectedImprovement( model=mm_noisy, X_baseline=X_baseline, sampler=sampler ) res = acqf(X) self.assertEqual(res.item(), 1.0) # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qNoisyExpectedImprovement( model=mm_noisy, X_baseline=X_baseline, sampler=sampler ) res = acqf(X) self.assertEqual(res.item(), 1.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qNoisyExpectedImprovement( model=mm_noisy, X_baseline=X_baseline, sampler=sampler ) res = acqf(X) self.assertEqual(res.item(), 1.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True, seed=12345) acqf = qNoisyExpectedImprovement( model=mm_noisy, X_baseline=X_baseline, sampler=sampler ) res = acqf(X) self.assertEqual(res.item(), 1.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
def test_MockPosterior(self): mean = torch.rand(2) variance = torch.eye(2) samples = torch.rand(1, 2) mp = MockPosterior(mean=mean, variance=variance, samples=samples) self.assertTrue(torch.equal(mp.mean, mean)) self.assertTrue(torch.equal(mp.variance, variance)) self.assertTrue(torch.all(mp.sample() == samples.unsqueeze(0))) self.assertTrue( torch.all(mp.sample(torch.Size([2])) == samples.repeat(2, 1, 1)) )
def test_GPyTorchPosterior(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.rand(3, dtype=dtype, device=device) variance = 1 + torch.rand(3, dtype=dtype, device=device) covar = variance.diag() mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) self.assertTrue(torch.equal(posterior.variance, variance.unsqueeze(-1))) # rsample samples = posterior.rsample() self.assertEqual(samples.shape, torch.Size([1, 3, 1])) samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): posterior.rsample( sample_shape=torch.Size([3]), base_samples=base_samples ) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=device) b_covar = b_variance.unsqueeze(-1) * torch.eye(3).type_as(b_variance) b_mvn = MultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 1, device=device, dtype=dtype) b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def test_local_tensor_iterable_methods(self): x = torch.FloatTensor([1, 2, 3]) y = torch.FloatTensor([2, 3, 4]) z = torch.FloatTensor([5, 6, 7]) assert(torch.equal(torch.stack([x, y, z]), torch.FloatTensor([[1, 2, 3], [2, 3, 4], [5, 6, 7]]))) x = torch.FloatTensor([1, 2, 3]) y = torch.FloatTensor([2, 3, 4]) z = torch.FloatTensor([5, 6, 7]) assert (torch.equal(torch.cat([x, y, z]), torch.FloatTensor([1, 2, 3, 2, 3, 4, 5, 6, 7])))
def test_generic_mc_objective(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): obj = GenericMCObjective(generic_obj) samples = torch.randn(1, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(2, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(3, 1, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(3, 2, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
def test_make_grid_not_inplace(self): t = torch.rand(5, 3, 10, 10) t_clone = t.clone() utils.make_grid(t, normalize=False) assert torch.equal(t, t_clone), 'make_grid modified tensor in-place' utils.make_grid(t, normalize=True, scale_each=False) assert torch.equal(t, t_clone), 'make_grid modified tensor in-place' utils.make_grid(t, normalize=True, scale_each=True) assert torch.equal(t, t_clone), 'make_grid modified tensor in-place'
def test_q_expected_improvement(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # the event shape is `b x q x t` = 1 x 1 x 1 samples = torch.zeros(1, 1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(samples=samples)) # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking X = torch.zeros(1, 1, device=device, dtype=dtype) # basic test sampler = IIDNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) # test shifting best_f value acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 1.0) # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() res = acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
def do_test_per_param_optim(self, fixed_param, free_param): pyro.clear_param_store() def model(): prior_dist = Normal(self.mu0, torch.pow(self.lam0, -0.5)) mu_latent = pyro.sample("mu_latent", prior_dist) x_dist = Normal(mu_latent, torch.pow(self.lam, -0.5)) pyro.observe("obs", x_dist, self.data) return mu_latent def guide(): mu_q = pyro.param( "mu_q", Variable( torch.zeros(1), requires_grad=True)) log_sig_q = pyro.param( "log_sig_q", Variable( torch.zeros(1), requires_grad=True)) sig_q = torch.exp(log_sig_q) pyro.sample("mu_latent", Normal(mu_q, sig_q)) def optim_params(module_name, param_name, tags): if param_name == fixed_param: return {'lr': 0.00} elif param_name == free_param: return {'lr': 0.01} adam = optim.Adam(optim_params) adam2 = optim.Adam(optim_params) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) svi2 = SVI(model, guide, adam2, loss="ELBO", trace_graph=True) svi.step() adam_initial_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step'] adam.save('adam.unittest.save') svi.step() adam_final_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step'] adam2.load('adam.unittest.save') svi2.step() adam2_step_count_after_load_and_step = list(adam2.get_state()['mu_q']['state'].items())[0][1]['step'] assert adam_initial_step_count == 1 assert adam_final_step_count == 2 assert adam2_step_count_after_load_and_step == 2 free_param_unchanged = torch.equal(pyro.param(free_param).data, torch.zeros(1)) fixed_param_unchanged = torch.equal(pyro.param(fixed_param).data, torch.zeros(1)) assert fixed_param_unchanged and not free_param_unchanged
def test_match_batch_shape(self): X = torch.rand(3, 2) Y = torch.rand(1, 3, 2) X_tf = match_batch_shape(X, Y) self.assertTrue(torch.equal(X_tf, X.unsqueeze(0))) X = torch.rand(1, 3, 2) Y = torch.rand(2, 3, 2) X_tf = match_batch_shape(X, Y) self.assertTrue(torch.equal(X_tf, X.repeat(2, 1, 1))) X = torch.rand(2, 3, 2) Y = torch.rand(1, 3, 2) with self.assertRaises(RuntimeError): match_batch_shape(X, Y)
def test_standardize(self, cuda=False): tkwargs = {"device": torch.device("cuda" if cuda else "cpu")} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype X = torch.tensor([0.0, 0.0], **tkwargs) self.assertTrue(torch.equal(X, standardize(X))) X2 = torch.tensor([0.0, 1.0, 1.0, 1.0], **tkwargs) expected_X2_stdized = torch.tensor([-1.5, 0.5, 0.5, 0.5], **tkwargs) self.assertTrue(torch.equal(expected_X2_stdized, standardize(X2))) X3 = torch.tensor( [[0.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]], **tkwargs ).transpose(1, 0) X3_stdized = standardize(X3) self.assertTrue(torch.equal(X3_stdized[:, 0], expected_X2_stdized)) self.assertTrue(torch.equal(X3_stdized[:, 1], torch.zeros(4, **tkwargs)))
def test_identity_mc_objective(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): obj = IdentityMCObjective() # single-element tensor samples = torch.randn(1, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), samples[0])) # single-dimensional non-squeezable tensor samples = torch.randn(2, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), samples)) # two-dimensional squeezable tensor samples = torch.randn(3, 1, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), samples.squeeze(-1))) # two-dimensional non-squeezable tensor samples = torch.randn(3, 2, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), samples))
def test_read_embedding_file_inside_archive(self): token2vec = { "think": torch.Tensor([0.143, 0.189, 0.555, 0.361, 0.472]), "make": torch.Tensor([0.878, 0.651, 0.044, 0.264, 0.872]), "difference": torch.Tensor([0.053, 0.162, 0.671, 0.110, 0.259]), "àèìòù": torch.Tensor([1.0, 2.0, 3.0, 4.0, 5.0]) } vocab = Vocabulary() for token in token2vec: vocab.add_token_to_namespace(token) params = Params({ 'pretrained_file': str(self.FIXTURES_ROOT / 'embeddings/multi-file-archive.zip'), 'embedding_dim': 5 }) with pytest.raises(ValueError, message="No ValueError when pretrained_file is a multi-file archive"): Embedding.from_params(vocab, params) for ext in ['.zip', '.tar.gz']: archive_path = str(self.FIXTURES_ROOT / 'embeddings/multi-file-archive') + ext file_uri = format_embeddings_file_uri(archive_path, 'folder/fake_embeddings.5d.txt') params = Params({ 'pretrained_file': file_uri, 'embedding_dim': 5 }) embeddings = Embedding.from_params(vocab, params).weight.data for tok, vec in token2vec.items(): i = vocab.get_token_index(tok) assert torch.equal(embeddings[i], vec), 'Problem with format ' + archive_path
def test_archiving(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / 'archive_test' model = train_model(self.params, serialization_dir=serialization_dir) archive_path = serialization_dir / "model.tar.gz" # load from the archive archive = load_archive(archive_path) model2 = archive.model # check that model weights are the same keys = set(model.state_dict().keys()) keys2 = set(model2.state_dict().keys()) assert keys == keys2 for key in keys: assert torch.equal(model.state_dict()[key], model2.state_dict()[key]) # check that vocabularies are the same vocab = model.vocab vocab2 = model2.vocab assert vocab._token_to_index == vocab2._token_to_index # pylint: disable=protected-access assert vocab._index_to_token == vocab2._index_to_token # pylint: disable=protected-access # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy
def compare_state_dict(sa, sb): if sa.keys() != sb.keys(): return False for k, va in sa.items(): if not torch.equal(va, sb[k]): return False return True
def test_local_tensor_multi_var_methods(self): x = torch.FloatTensor([[1, 2], [2, 3], [5, 6]]) t, s = torch.max(x, 1) assert (t == torch.FloatTensor([2, 3, 6])).float().sum() == 3 assert (s == torch.LongTensor([1, 1, 1])).float().sum() == 3 x = torch.FloatTensor([[0, 0], [1, 1]]) y, z = torch.eig(x, True) assert (y == torch.FloatTensor([[1, 0], [0, 0]])).all() assert (torch.equal(z == torch.FloatTensor([[0, 0], [1, 0]]), torch.ByteTensor([[1, 0], [1, 0]]))) x = torch.FloatTensor([[0, 0], [1, 0]]) y, z = torch.qr(x) assert (y == torch.FloatTensor([[0, -1], [-1, 0]])).all() assert (z == torch.FloatTensor([[-1, 0], [0, 0]])).all() x = torch.arange(1, 6) y, z = torch.kthvalue(x, 4) assert (y == torch.FloatTensor([4])).all() assert (z == torch.LongTensor([3])).all() x = torch.zeros(3, 3) w, y, z = torch.svd(x) assert (w == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all() assert (y == torch.FloatTensor([0, 0, 0])).all() assert (z == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
def test_joint_optimize( self, mock_get_best_candidates, mock_gen_candidates, mock_gen_batch_initial_conditions, cuda=False, ): q = 3 num_restarts = 2 raw_samples = 10 options = {} mock_acq_function = MockAcquisitionFunction() tkwargs = {"device": torch.device("cuda") if cuda else torch.device("cpu")} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype mock_gen_batch_initial_conditions.return_value = torch.zeros( num_restarts, q, 3, **tkwargs ) mock_gen_candidates.return_value = torch.cat( [i * torch.ones(1, q, 3, **tkwargs) for i in range(num_restarts)], dim=0 ) mock_get_best_candidates.return_value = torch.ones(1, q, 3, **tkwargs) expected_candidates = mock_get_best_candidates.return_value bounds = torch.stack( [torch.zeros(3, **tkwargs), 4 * torch.ones(3, **tkwargs)] ) candidates = joint_optimize( acq_function=mock_acq_function, bounds=bounds, q=q, num_restarts=num_restarts, raw_samples=raw_samples, options=options, ) self.assertTrue(torch.equal(candidates, expected_candidates))
def assertNotEqual(self, x, y, prec=None, message=''): if prec is None: prec = self.precision x, y = self.unwrapVariables(x, y) if torch.is_tensor(x) and torch.is_tensor(y): if x.size() != y.size(): super(TestCase, self).assertNotEqual(x.size(), y.size()) self.assertGreater(x.numel(), 0) y = y.type_as(x) y = y.cuda(device=x.get_device()) if x.is_cuda else y.cpu() nan_mask = x != x if torch.equal(nan_mask, y != y): diff = x - y if diff.is_signed(): diff = diff.abs() diff[nan_mask] = 0 max_err = diff.max() self.assertGreaterEqual(max_err, prec, message) elif type(x) == str and type(y) == str: super(TestCase, self).assertNotEqual(x, y) elif is_iterable(x) and is_iterable(y): super(TestCase, self).assertNotEqual(x, y) else: try: self.assertGreaterEqual(abs(x - y), prec, message) return except (TypeError, AssertionError): pass super(TestCase, self).assertNotEqual(x, y, message)
def test_gen_batch_initial_conditions_simple_warning(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): bounds = torch.tensor([[0, 0], [1, 1]], device=device, dtype=dtype) with warnings.catch_warnings(record=True) as ws: with mock.patch( "botorch.optim.optimize.draw_sobol_samples", return_value=torch.zeros(10, 1, 2, device=device, dtype=dtype), ): batch_initial_conditions = gen_batch_initial_conditions( acq_function=MockAcquisitionFunction(), bounds=bounds, q=1, num_restarts=2, raw_samples=10, ) self.assertEqual(len(ws), 1) self.assertTrue( issubclass(ws[-1].category, BadInitialCandidatesWarning) ) self.assertTrue( torch.equal( batch_initial_conditions, torch.zeros(2, 1, 2, device=device, dtype=dtype), ) )
def geometric(p, t=None): t = 0 if t is None else t x = pyro.sample("x_{}".format(t), dist.bernoulli, p) if torch.equal(x.data, torch.zeros(1)): return x else: return x + geometric(p, t+1)
def test_python_ir(self): x = Variable(torch.Tensor([0.4]), requires_grad=True) y = Variable(torch.Tensor([0.7]), requires_grad=True) def doit(x, y): return torch.sigmoid(torch.tanh(x * (x + y))) traced, _ = torch.jit.trace(doit, (x, y)) g = torch._C._jit_get_graph(traced) g2 = torch._C.Graph() g_to_g2 = {} for node in g.inputs(): g_to_g2[node] = g2.addInput() for node in g.nodes(): n_ = g2.createClone(node, lambda x: g_to_g2[x]) g2.appendNode(n_) for o, no in zip(node.outputs(), n_.outputs()): g_to_g2[o] = no for node in g.outputs(): g2.registerOutput(g_to_g2[node]) t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2])) assert(t_node.attributeNames() == ["a"]) g2.appendNode(t_node) assert(torch.equal(torch.ones([2, 2]), t_node.t("a"))) self.assertExpected(str(g2))
def test_forward_pass_runs_correctly(self): """ Check to make sure a forward pass on an ensemble of two identical copies of a model yields the same results as the model itself. """ bidaf_ensemble = BidafEnsemble([self.model, self.model]) batch = Batch(self.instances) batch.index_instances(self.vocab) training_tensors = batch.as_tensor_dict() bidaf_output_dict = self.model(**training_tensors) ensemble_output_dict = bidaf_ensemble(**training_tensors) metrics = self.model.get_metrics(reset=True) # We've set up the data such that there's a fake answer that consists of the whole # paragraph. _Any_ valid prediction for that question should produce an F1 of greater than # zero, while if we somehow haven't been able to load the evaluation data, or there was an # error with using the evaluation script, this will fail. This makes sure that we've # loaded the evaluation data correctly and have hooked things up to the official evaluation # script. assert metrics['f1'] > 0 assert torch.equal(ensemble_output_dict['best_span'], bidaf_output_dict['best_span']) assert ensemble_output_dict['best_span_str'] == bidaf_output_dict['best_span_str']
def test_torch_function_with_multiple_output_on_remote_var(self): hook = TorchHook(verbose=False) me = hook.local_worker remote = VirtualWorker(id=2, hook=hook) me.add_worker(remote) x = Var(torch.FloatTensor([[1, 2], [4, 3], [5, 6]])) x.send(remote) y, z = torch.max(x, 1) y.get() assert torch.equal(y, Var(torch.FloatTensor([2, 4, 6]))) x = Var(torch.FloatTensor([[0, 0], [1, 0]])).send(remote) y, z = torch.qr(x) assert (y.get() == Var(torch.FloatTensor([[0, -1], [-1, 0]]))).all() assert (z.get() == Var(torch.FloatTensor([[-1, 0], [0, 0]]))).all() x = Var(torch.arange(1, 6)).send(remote) y, z = torch.kthvalue(x, 4) assert (y.get() == Var(torch.FloatTensor([4]))).all() assert (z.get() == Var(torch.LongTensor([3]))).all() x = Var(torch.FloatTensor([[0, 0], [0, 0]])) x.send(remote) y, z = torch.eig(x, True) assert (y.get() == Var(torch.FloatTensor([[0, 0], [0, 0]]))).all() assert (z.get() == Var(torch.FloatTensor([[1, 0.], [0, 1]]))).all() x = Var(torch.zeros(3, 3)).send(remote) w, y, z = torch.svd(x) assert (w.get() == Var(torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))).all() assert (y.get() == Var(torch.FloatTensor([0, 0, 0]))).all() assert (z.get() == Var(torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))).all()
def test_GPyTorchPosterior_Multitask(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.rand(3, 2, dtype=dtype, device=device) variance = 1 + torch.rand(3, 2, dtype=dtype, device=device) covar = variance.view(-1).diag() mvn = MultitaskMultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 2])) self.assertTrue(torch.equal(posterior.mean, mean)) self.assertTrue(torch.equal(posterior.variance, variance)) # rsample samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 2])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2])) # rsample w/ base samples base_samples = torch.randn(4, 3, 2, device=device, dtype=dtype) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 2, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, 2, dtype=dtype, device=device) b_variance = 1 + torch.rand(2, 3, 2, dtype=dtype, device=device) b_covar = b_variance.view(2, 6, 1) * torch.eye(6).type_as(b_variance) b_mvn = MultitaskMultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 2, device=device, dtype=dtype) b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
def test_sequential_optimize(self, mock_joint_optimize, cuda=False): q = 3 num_restarts = 2 raw_samples = 10 options = {} tkwargs = {"device": torch.device("cuda") if cuda else torch.device("cpu")} for dtype in (torch.float, torch.double): mock_acq_function = MockAcquisitionFunction() tkwargs["dtype"] = dtype joint_optimize_return_values = [ torch.tensor([[[1.1, 2.1, 3.1]]], **tkwargs) for _ in range(q) ] mock_joint_optimize.side_effect = joint_optimize_return_values expected_candidates = torch.cat( joint_optimize_return_values, dim=-2 ).round() bounds = torch.stack( [torch.zeros(3, **tkwargs), 4 * torch.ones(3, **tkwargs)] ) inequality_constraints = [ (torch.tensor([3]), torch.tensor([4]), torch.tensor(5)) ] candidates = sequential_optimize( acq_function=mock_acq_function, bounds=bounds, q=q, num_restarts=num_restarts, raw_samples=raw_samples, options=options, inequality_constraints=inequality_constraints, post_processing_func=rounding_func, ) self.assertTrue(torch.equal(candidates, expected_candidates)) expected_call_kwargs = { "acq_function": mock_acq_function, "bounds": bounds, "q": 1, "num_restarts": num_restarts, "raw_samples": raw_samples, "options": options, "inequality_constraints": inequality_constraints, "equality_constraints": None, "fixed_features": None, } call_args_list = mock_joint_optimize.call_args_list[-q:] for i in range(q): self.assertEqual(call_args_list[i][1], expected_call_kwargs) # test that error is raised for acquisition functions without X_baseline mock_acq_function = MockAcquisitionFunction(has_X_baseline_attr=False) with self.assertRaises(UnsupportedError): sequential_optimize( acq_function=mock_acq_function, bounds=bounds, q=q, num_restarts=num_restarts, raw_samples=raw_samples, )
def test_local_tensor_tertiary_methods(self): x = torch.FloatTensor([1, 2, 3]) y = torch.FloatTensor([1, 2, 3]) z = torch.FloatTensor([1, 2, 3]) assert (torch.equal(torch.addcmul(z, 2, x, y), torch.FloatTensor([3., 10., 21.]))) x = torch.FloatTensor([1, 2, 3]) y = torch.FloatTensor([1, 2, 3]) z = torch.FloatTensor([1, 2, 3]) z.addcmul_(2, x, y) assert (torch.equal(z, torch.FloatTensor([3., 10., 21.]))) x = torch.FloatTensor([[1, 2]]) y = torch.FloatTensor([[1, 2, 3], [4, 5, 6]]) z = torch.FloatTensor([1, 2, 3]) assert(torch.equal(torch.addmm(z, x, y), torch.FloatTensor([[10., 14., 18.]])))
def test_remote_var_binary_methods(self): ''' Unit tests for methods mentioned on issue 1385 https://github.com/OpenMined/PySyft/issues/1385''' hook = TorchHook(verbose=False) local = hook.local_worker remote = VirtualWorker(hook, 1) local.add_worker(remote) x = Var(torch.FloatTensor([1, 2, 3, 4])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3, 4]])).send(remote) z = torch.matmul(x, y.t()) assert (torch.equal(z.get(), Var(torch.FloatTensor([30])))) z = torch.add(x, y) assert (torch.equal(z.get(), Var(torch.FloatTensor([[2, 4, 6, 8]])))) x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) z = torch.cross(x, y, dim=1) assert (torch.equal(z.get(), Var(torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]])))) x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) z = torch.dist(x, y) assert (torch.equal(z.get(), Var(torch.FloatTensor([0.])))) x = Var(torch.FloatTensor([1, 2, 3])).send(remote) y = Var(torch.FloatTensor([1, 2, 3])).send(remote) z = torch.dot(x, y) print(torch.equal(z.get(), Var(torch.FloatTensor([14])))) z = torch.eq(x, y) assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1])))) z = torch.ge(x, y) assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1]))))
def forward(self, inputs, is_training, add_noise): """ Args: inputs: [batch_size x 1 x sourceL] """ batch_size = inputs.size(0) seq_len = inputs.size(2) assert seq_len == self.seq_len + 1 embedded = self.embedding(inputs) embedded = self.gaussian(embedded, is_training, add_noise) encoder_outputs, (hidden, context) = self.encoder(embedded) prev_probs = [] prev_idxs = [] mask = torch.zeros(batch_size, seq_len).byte() if self.use_cuda: mask = mask.cuda() idxs = None # decoder_input = [batch, embedding_size] decoder_input = self.decoder_start_input.unsqueeze(0).repeat( batch_size, 1) stop_dict = {} # for CH and DT, this for loop shuld be replaced by while loop loop_idx = 0 while True: _, (hidden, context) = self.decoder(decoder_input.unsqueeze(1), (hidden, context)) query = hidden.squeeze(0) for i in range(self.n_glimpses): ref, logits = self.glimpse(query, encoder_outputs) logits, mask = self.apply_mask_to_logits(logits, mask, idxs) query = torch.bmm( ref, F.softmax(logits).unsqueeze(2), ).squeeze(2) _, logits = self.pointer(query, encoder_outputs) logits, mask = self.apply_mask_to_logits(logits, mask, idxs) probs = F.softmax(logits) # torch.multinomial: sampling with multinomial distribution # torch.squeeze: eliminate the dimension of size 1 e.g. (4, 1, 3) -> (4, 3) idxs = probs.multinomial(1).squeeze(1) ''' for old_idxs in prev_idxs: if old_idxs.eq(idxs).data.any(): print(seq_len) print('RESAMPLE!') idxs = probs.multinomial(1).squeeze(1) break ''' # check and process idxs for i in range(batch_size): if idxs[i] == 0 and not (i in stop_dict.keys()): stop_dict[i] = loop_idx if i in stop_dict.keys(): idxs[i] = 0 # decoder_input = [batch_size, indexes, embedding_size] decoder_input = embedded[[i for i in range(batch_size)], idxs.data, :] prev_probs.append(probs) prev_idxs.append(idxs) loop_idx += 1 if torch.equal(idxs, torch.zeros_like(idxs)): break return prev_probs, prev_idxs, stop_dict, loop_idx
def run_test_pipe(rank, world_size, filename, filename_rpc, skip_dist_init=False): pipe_world_size = 2 if world_size == 1: return if not skip_dist_init: dist_init(rank, world_size, filename, filename_rpc) else: os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "29502" rpc.init_rpc(f"Test{rank}", rank=rank, world_size=world_size) mpu.initialize_model_parallel(world_size / pipe_world_size, pipe_world_size) model_parallel_size = mpu.get_model_parallel_world_size() if torch.distributed.get_rank() == 0: print( "> testing Sequential + MultiProcessPipe with model parallel size: {}, pipe: {}" .format(model_parallel_size, pipe_world_size)) chunk_size = 4 seed = 12345 set_random_seed(seed) input_size_coeff = 3 input_size = input_size_coeff * model_parallel_size output_size_coeff = 7 output_size = output_size_coeff * model_parallel_size batch_size = 3 * chunk_size target = torch.rand((batch_size, input_size), requires_grad=True).cuda() print(f"target = {target}") identity = IdentityLayer2D(batch_size, input_size).cuda() pipeline_devices = mpu.get_pipeline_parallel_group() set_random_seed(seed) model = nn.Sequential( layers.ColumnParallelLinear(input_size, output_size, keep_master_weight_for_test=True, bias=False).cuda(), nn.ReLU(), layers.RowParallelLinear(output_size, input_size, keep_master_weight_for_test=True, bias=False).cuda(), ) set_random_seed(seed) reference = [ nn.Linear(input_size, output_size, bias=False).cuda(), nn.ReLU(), nn.Linear(output_size, input_size, bias=False).cuda(), ] print( f"setup {reference[0].weight.size()}, {model[0].weight.size()}, {(input_size, output_size)}" ) print(f"setup {reference[2].weight.size()}, {(output_size, input_size)}") reference[0].weight = Parameter( model[0].get_master_weight().clone()).cuda() reference[2].weight = Parameter( model[2].get_master_weight().clone()).cuda() reference = nn.Sequential(*reference) def grad_graph(depth, grad): result = depth * " " + str(grad) if grad: for x in grad.next_functions: result += "\n" + grad_graph(depth + 1, x[0]) return result def check_weights(x, y, key: str, index=None): for i in [2, 0]: if index is not None and i != index: continue left = x[i].get_master_weight() right = y[i].weight.data if not torch.allclose(left, right, atol=1.0e-6) or index is not None: print( f"check_weights {key}-{i}: left = {left}, \nright = {right}" ) if not torch.equal(left, right): print( f"check_weights NOT_EQUAL {key}-{i}: left = {left}, \nright = {right}" ) assert torch.allclose(left, right, atol=1.0e-6) def dump_opt_params(opt): for i, group in enumerate(opt.param_groups): for j, p in enumerate(group["params"]): print(f"{torch.distributed.get_rank()}:param {(i,j)} = {p}") print( f"{torch.distributed.get_rank()}:param.grad {(i,j)} = {p.grad}" ) def forward_model(model_, target, step=False): optimizer = torch.optim.SGD(model_.parameters(), lr=0.01, momentum=0.9) optimizer.zero_grad() model_.zero_grad() output = model_(identity()) loss = nn.MSELoss() model_.zero_grad() if step: loss(output, target).backward() saved_weight_0 = model_[0].weight.data.clone() saved_weight_2 = model_[2].weight.data.clone() dump_opt_params(optimizer) optimizer.step() assert not torch.allclose( saved_weight_0, model_[0].weight.data, atol=1.0e-6) assert not torch.allclose( saved_weight_2, model_[2].weight.data, atol=1.0e-6) return output output = forward_model(model, target) reference_output = forward_model(reference, target) error = reference_output.sub(output).max() torch.distributed.barrier() assert error < 1.0e-6 output = forward_model(model, target) error = reference_output.sub(output).max() torch.distributed.barrier() assert error < 1.0e-6 output = forward_model(model, target) error = reference_output.sub(output).max() torch.distributed.barrier() assert error < 1.0e-6 check_weights(model, reference, "before") saved_weight_0 = model[0].weight.data.clone() saved_weight_2 = model[2].weight.data.clone() output = forward_model(model, target, step=True) error = reference_output.sub(output).max() assert error < 1.0e-6 model[0].weight.data = saved_weight_0 model[2].weight.data = saved_weight_2 worker_map = { i: f"Test{i}" for i in range(torch.distributed.get_world_size()) } if pipe_world_size == 2: print(f"actually doing pipe stuff now") assert torch.equal(saved_weight_0, model[0].weight.data) assert torch.equal(saved_weight_2, model[2].weight.data) pipe_model = MultiProcessPipe( model, [2, 1], group=pipeline_devices, worker_map=worker_map, input_device=torch.cuda.current_device(), chunks=chunk_size, ).cuda() torch.distributed.barrier() pipe_rank = torch.distributed.get_rank( group=mpu.get_pipeline_parallel_group()) print(f"pipe rank is {pipe_rank}") if pipe_rank == 0: assert torch.equal(saved_weight_0, pipe_model[0].weight.data) else: if not torch.equal(saved_weight_2, pipe_model[0].weight.data): print( f"ne {pipe_rank}: left\n{saved_weight_2}\nright:\n{pipe_model[0].weight.data}" ) assert torch.equal(saved_weight_2, pipe_model[0].weight.data) optimizer = torch.optim.SGD(pipe_model.parameters(), lr=0.01, momentum=0.9) optimizer.zero_grad() if pipe_rank == 0: assert torch.equal(saved_weight_0, pipe_model[0].weight.data) print(f"runner {rank}:\n{pipe_model[0].weight.data}") else: assert torch.equal(saved_weight_2, pipe_model[0].weight.data) print(f"runner {rank}:\n{pipe_model[0].weight.data}") if torch.distributed.get_rank(mpu.get_pipeline_parallel_group()) == 1: check_weights(model, reference, "pre-pipe", index=2) else: check_weights(model, reference, "pre-pipe", index=0) pipe_output = pipe_model(identity()) print(f"exited pipe for {rank}") forward_model(reference, target, step=True) print(f"pipe_output {rank} = {pipe_output}") print(f"reference_output {rank} = {reference_output}") torch.distributed.barrier() if torch.distributed.get_rank(mpu.get_pipeline_parallel_group()) == 1: error = reference_output.sub(pipe_output.cuda()).max() if error >= 1.0e-6: print(f"error bad {error}") assert error < 1.0e-6 loss = nn.MSELoss() failed = False pipe_output.retain_grad() with torch.autograd.profiler.profile() as prof: try: loss(pipe_output, target).backward() except Exception as e: failed = True print(f"got {e} while doing backward, deadlock?") if failed: raise RuntimeError("failed somehow") dump_opt_params(optimizer) optimizer.step() print(f"calling check_weights on master") check_weights(model, reference, "pipe", index=2) print(f"waiting for barrier on master, pid={os.getpid()}") else: print(f"calling backwards on slave, pid={os.getpid()}") failed = False with torch.autograd.profiler.profile() as prof: try: pipe_model.back_helper(pipe_output) except Exception as e: failed = True print(f"got {e} while doing backward, deadlock?") if failed: raise RuntimeError("failed somehow") dump_opt_params(optimizer) print(f"calling step on slave") optimizer.step() print(f"calling check_weights on slave") check_weights(model, reference, "pipe", index=0) print(f"waiting for barrier on slave") pipe_model.zero_grad() torch.distributed.barrier() pipe_model.eval() pipe_output = pipe_model(identity()) updated_ref_output = forward_model(reference, target) if torch.distributed.get_rank(mpu.get_pipeline_parallel_group()) == 1: error = updated_ref_output.sub(pipe_output.cuda()).max() print( f"outputs are ref:\n{updated_ref_output}\npipe:\n{pipe_output}" ) assert error < 1.0e-6 torch.distributed.barrier() print(f"finished waiting for barrier on, pid={os.getpid()}") print(f"really exited pipe for {rank}") rpc.shutdown() torch.distributed.destroy_process_group()
def test_conv_module(): with pytest.raises(AssertionError): # conv_cfg must be a dict or None conv_cfg = 'conv' ConvModule(3, 8, 2, conv_cfg=conv_cfg) with pytest.raises(AssertionError): # norm_cfg must be a dict or None norm_cfg = 'norm' ConvModule(3, 8, 2, norm_cfg=norm_cfg) with pytest.raises(KeyError): # softmax is not supported act_cfg = dict(type='softmax') ConvModule(3, 8, 2, act_cfg=act_cfg) # conv + norm + act conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) assert conv.with_activation assert hasattr(conv, 'activate') assert conv.with_norm assert hasattr(conv, 'norm') x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # conv + act conv = ConvModule(3, 8, 2) assert conv.with_activation assert hasattr(conv, 'activate') assert not conv.with_norm assert not hasattr(conv, 'norm') x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # conv conv = ConvModule(3, 8, 2, act_cfg=None) assert not conv.with_norm assert not hasattr(conv, 'norm') assert not conv.with_activation assert not hasattr(conv, 'activate') x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # conv with its own `init_weights` method conv_module = ConvModule(3, 8, 2, conv_cfg=dict(type='ExampleConv'), act_cfg=None) assert torch.equal(conv_module.conv.conv0.weight, torch.zeros(8, 3, 2, 2)) # with_spectral_norm=True conv = ConvModule(3, 8, 3, padding=1, with_spectral_norm=True) assert hasattr(conv.conv, 'weight_orig') output = conv(x) assert output.shape == (1, 8, 256, 256) # padding_mode='reflect' conv = ConvModule(3, 8, 3, padding=1, padding_mode='reflect') assert isinstance(conv.padding_layer, nn.ReflectionPad2d) output = conv(x) assert output.shape == (1, 8, 256, 256) # non-existing padding mode with pytest.raises(KeyError): conv = ConvModule(3, 8, 3, padding=1, padding_mode='non_exists') # leaky relu conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU')) assert isinstance(conv.activate, nn.LeakyReLU) output = conv(x) assert output.shape == (1, 8, 256, 256) # tanh conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='Tanh')) assert isinstance(conv.activate, nn.Tanh) output = conv(x) assert output.shape == (1, 8, 256, 256) # Sigmoid conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='Sigmoid')) assert isinstance(conv.activate, nn.Sigmoid) output = conv(x) assert output.shape == (1, 8, 256, 256) # PReLU conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='PReLU')) assert isinstance(conv.activate, nn.PReLU) output = conv(x) assert output.shape == (1, 8, 256, 256) # HSwish conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSwish')) assert isinstance(conv.activate, HSwish) output = conv(x) assert output.shape == (1, 8, 256, 256) # HSigmoid conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSigmoid')) assert isinstance(conv.activate, HSigmoid) output = conv(x) assert output.shape == (1, 8, 256, 256)
for lstm, name in lstms: th.manual_seed(1234) x = V(th.rand(1, 1, 256)) hiddens = (V(th.rand(1, 1, 256)), V(th.rand(1, 1, 256))) ref = nn.LSTM(256, 256, bias=False, dropout=0.0) cus = lstm(256, 256, bias=False, dropout=0.0) # Make sure they have the same parameters: val = th.rand(1)[0] for c in cus.parameters(): c.data.fill_(val) for r in ref.parameters(): r.data.fill_(val) objective = V(th.zeros(1, 256)) i, j = x.clone(), [h.clone() for h in hiddens] g, h = x.clone(), [h.clone() for h in hiddens] for _ in range(10): i, j = ref(i, j) g, h = cus(g, h) assert (th.equal(g.data, i.data)) assert (th.equal(j[0].data, h[0].data)) assert (th.equal(j[1].data, h[1].data)) ref_loss = th.sum((i - objective)**2) cus_loss = th.sum((g - objective)**2) ref_loss.backward(retain_graph=True) cus_loss.backward(retain_graph=True) print('Correct: ', name) print('Test passed')
def test_multi_objective_max_value_entropy(self): for dtype, m in product((torch.float, torch.double), (2, 3)): torch.manual_seed(7) # test batched model train_X = torch.rand(1, 1, 2, dtype=dtype, device=self.device) train_Y = torch.rand(1, 1, m, dtype=dtype, device=self.device) model = SingleTaskGP(train_X, train_Y) with self.assertRaises(NotImplementedError): qMultiObjectiveMaxValueEntropy(model, dummy_sample_pareto_frontiers) # test initialization train_X = torch.rand(4, 2, dtype=dtype, device=self.device) train_Y = torch.rand(4, m, dtype=dtype, device=self.device) # test batched MO model model = SingleTaskGP(train_X, train_Y) mesmo = qMultiObjectiveMaxValueEntropy( model, dummy_sample_pareto_frontiers) self.assertEqual(mesmo.num_fantasies, 16) self.assertIsInstance(mesmo.sampler, SobolQMCNormalSampler) self.assertEqual(mesmo.sampler.sample_shape, torch.Size([128])) self.assertIsInstance(mesmo.fantasies_sampler, SobolQMCNormalSampler) self.assertEqual(mesmo.posterior_max_values.shape, torch.Size([3, 1, m])) # test conversion to single-output model self.assertIs(mesmo.mo_model, model) self.assertEqual(mesmo.mo_model.num_outputs, m) self.assertIsInstance(mesmo.model, SingleTaskGP) self.assertEqual(mesmo.model.num_outputs, 1) self.assertEqual(mesmo.model._aug_batch_shape, mesmo.model._input_batch_shape) # test ModelListGP model = ModelListGP( * [SingleTaskGP(train_X, train_Y[:, i:i + 1]) for i in range(m)]) mock_sample_pfs = mock.Mock() mock_sample_pfs.return_value = dummy_sample_pareto_frontiers( model=model) mesmo = qMultiObjectiveMaxValueEntropy(model, mock_sample_pfs) self.assertEqual(mesmo.num_fantasies, 16) self.assertIsInstance(mesmo.sampler, SobolQMCNormalSampler) self.assertEqual(mesmo.sampler.sample_shape, torch.Size([128])) self.assertIsInstance(mesmo.fantasies_sampler, SobolQMCNormalSampler) self.assertEqual(mesmo.posterior_max_values.shape, torch.Size([3, 1, m])) # test conversion to batched MO model self.assertIsInstance(mesmo.mo_model, SingleTaskGP) self.assertEqual(mesmo.mo_model.num_outputs, m) self.assertIs(mesmo.mo_model, mesmo._init_model) # test conversion to single-output model self.assertIsInstance(mesmo.model, SingleTaskGP) self.assertEqual(mesmo.model.num_outputs, 1) self.assertEqual(mesmo.model._aug_batch_shape, mesmo.model._input_batch_shape) # test that we call sample_pareto_frontiers with the multi-output model mock_sample_pfs.assert_called_once_with(mesmo.mo_model) # test basic evaluation X = torch.rand(1, 2, device=self.device, dtype=dtype) with torch.no_grad(): vals = mesmo(X) igs = qMaxValueEntropy.forward(mesmo, X=X.view(1, 1, 1, 2)) self.assertEqual(vals.shape, torch.Size([1])) self.assertTrue(torch.equal(vals, igs.sum(dim=-1))) # test batched evaluation X = torch.rand(4, 1, 2, device=self.device, dtype=dtype) with torch.no_grad(): vals = mesmo(X) igs = qMaxValueEntropy.forward(mesmo, X=X.view(4, 1, 1, 2)) self.assertEqual(vals.shape, torch.Size([4])) self.assertTrue(torch.equal(vals, igs.sum(dim=-1))) # test set X pending to None mesmo.set_X_pending(None) self.assertIs(mesmo.mo_model, mesmo._init_model) fant_X = torch.cat( [ train_X.expand(16, 4, 2), torch.rand(16, 1, 2, device=self.device, dtype=dtype), ], dim=1, ) fant_Y = torch.cat( [ train_Y.expand(16, 4, m), torch.rand(16, 1, m, device=self.device, dtype=dtype), ], dim=1, ) fantasy_model = SingleTaskGP(fant_X, fant_Y) # test with X_pending is not None with mock.patch.object( SingleTaskGP, "fantasize", return_value=fantasy_model) as mock_fantasize: qMultiObjectiveMaxValueEntropy( model, dummy_sample_pareto_frontiers, X_pending=torch.rand(1, 2, device=self.device, dtype=dtype), ) mock_fantasize.assert_called_once()
def test_dataset_rng_states_restart(dataset_class, num_workers, batch_size): """Test that the sequence of batches coming from a random number generator continues with the correct sequence after reloading the state.""" def create_dataset_sampler(): dset = CaptureMapDataset(dataset_class(16, 8)) random_sampler = RandomSampler(dset, generator=torch.Generator()) return dset, random_sampler def create_dataloader_sampler(dset, sampler): sampler = FastForwardSampler(sampler) sampler.setup(batch_size) dl = DataLoader(dset, num_workers=num_workers, sampler=sampler, batch_size=batch_size) _add_capture_metadata_collate(dl) return dl, sampler def fetch(fetcher, prefetch_iter, num_batches_fetched): batch, _ = next(prefetch_iter) state: List[MergedIteratorState] = fetcher.state assert len(state) == 1 assert isinstance(state[0], MergedIteratorState) assert len(fetcher.dataloader_iter.cache_states) == 1 if num_workers == 0: assert state[0].state[0].num_batches_fetched == num_batches_fetched return state dataset, random_sampler = create_dataset_sampler() dataloader, ff_sampler = create_dataloader_sampler(dataset, random_sampler) fetcher = DataFetcher() fetcher.setup(dataloader) prefetch_iter = iter(fetcher) # fetch 4 batches fetch(fetcher, prefetch_iter, 1) fetch(fetcher, prefetch_iter, 2) fetch(fetcher, prefetch_iter, 3) # (A) capture the state after fetching 4 batches state = fetch(fetcher, prefetch_iter, 4) state = deepcopy(state[0]) # (B) simulate 2 additional batches batch05, _ = next(prefetch_iter) batch06, _ = next(prefetch_iter) # start reloading dataset, random_sampler = create_dataset_sampler() dataloader, ff_sampler = create_dataloader_sampler(dataset, random_sampler) # load the state dict saved at (A) ff_sampler.load_state_dict(state.sampler_states) dataset.load_state_dict(state.dataset_states, latest_worker_id=state.latest_worker_id, num_workers=num_workers) prefetcher = DataFetcher() prefetcher.setup(dataloader) prefetch_iter = iter(prefetcher) # fetch 2 random batches, these should match exactly the batches seen at (B) batch05_restart, _ = next(prefetch_iter) batch06_restart, _ = next(prefetch_iter) assert torch.equal(batch05, batch05_restart) assert torch.equal(batch06, batch06_restart)
def _test_fast_forward_sampler_with_distributed_sampler_and_iterative_dataset(rank, worldsize): if worldsize > 1: _setup_ddp(rank, worldsize) def all_gather(tensor, world_size): tensor_list = [torch.zeros_like(tensor, dtype=torch.int64) for _ in range(world_size)] torch.distributed.all_gather(tensor_list, tensor) return tensor_list initial_seed = seed_everything(42) generator = torch.Generator() generator.manual_seed(initial_seed) num_workers = 2 batch_size = 4 dataset_length = 60 num_classes = 10 labels = np.random.randint(0, num_classes, dataset_length) dataset = ClassificationDataset(range(dataset_length), labels) dataset = MetaLearningDataset( dataset, batch_size=batch_size, drop_last=True, num_workers=num_workers, global_rank=rank, world_size=worldsize, initial_seed=initial_seed, debugging=True, shuffle=True, ) dataset = CaptureIterableDataset(dataset) dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=1, generator=generator) _add_capture_metadata_collate(dataloader) epoch_results = [] for _ in range(2): iter_dataloader = iter(dataloader) batches = [] while True: try: batches.append(next(iter_dataloader)) except StopIteration: break epoch_results.append(batches) dataloader.dataset.dataset.current_task_iteration += 1 assert len(epoch_results) == 2 assert len(epoch_results[0]) == math.ceil((dataset_length / (num_workers * worldsize)) / batch_size) + 2 if worldsize == 1: assert epoch_results[0][0]["data"]["task_length"] == epoch_results[0][1]["data"]["task_length"] assert torch.equal( epoch_results[0][0]["data"]["selected_indexes"], epoch_results[0][1]["data"]["selected_indexes"] ) assert 0 in epoch_results[0][2][AutoRestartBatchKeys.PL_RESTART_META]["iter_sampler"] # worker id 0 assert 1 in epoch_results[0][3][AutoRestartBatchKeys.PL_RESTART_META]["iter_sampler"] # worker id 1 assert not torch.equal(epoch_results[0][2]["data"][0], epoch_results[0][3]["data"][0]) else: first_task_metadata = all_gather(epoch_results[0][0]["data"]["task_length"], worldsize) second_task_metadata = all_gather(epoch_results[0][1]["data"]["task_length"], worldsize) assert torch.equal(first_task_metadata[0], first_task_metadata[1]) assert torch.equal(second_task_metadata[0], second_task_metadata[1]) assert torch.equal(first_task_metadata[0], second_task_metadata[1]) first_batch_list = all_gather(epoch_results[0][2]["data"][0], worldsize) assert not torch.equal(first_batch_list[0], first_batch_list[1]) second_batch_list = all_gather(epoch_results[0][3]["data"][0], worldsize) assert not torch.equal(second_batch_list[0], second_batch_list[1]) # restarting on epoch 0 / real batch 2 state_dict = {"iter_sampler": {}} for batch in epoch_results[0][2:4]: batch, _state_dict = batch["data"], batch[AutoRestartBatchKeys.PL_RESTART_META] for k, v in _state_dict.items(): state_dict[k].update(v) dataset = ClassificationDataset(range(dataset_length), labels) dataset = MetaLearningDataset( dataset, batch_size=batch_size, drop_last=True, num_workers=num_workers, global_rank=rank, world_size=worldsize, initial_seed=initial_seed, debugging=True, shuffle=True, ) dataset = CaptureIterableDataset(dataset) dataset.load_state_dict(state_dict) dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=1, generator=generator) _add_capture_metadata_collate(dataloader) epoch_results_restart = [] for _ in range(2): iter_dataloader = iter(dataloader) batches = [] while True: try: batches.append(next(iter_dataloader)) except StopIteration: break epoch_results_restart.append(batches) dataloader.dataset.dataset.increment_iteration() dataloader.dataset.reset_on_epoch() assert len(epoch_results_restart[0]) + 2 == len(epoch_results[0]) epoch_tensors = [e["data"][0] for e in epoch_results[0][4:]] epoch_tensors_restart = [e["data"][0] for e in epoch_results_restart[0][2:]] for t, tr in zip(epoch_tensors, epoch_tensors_restart): assert torch.equal(t, tr) epoch_tensors = [e["data"][0] for e in epoch_results[1][2:]] epoch_tensors_restart = [e["data"][0] for e in epoch_results_restart[1][2:]] for t, tr in zip(epoch_tensors, epoch_tensors_restart): assert torch.equal(t, tr)
def test_init_copy(self): t = torch.randn(2, 3) t_other = torch.tensor([[1, 2, 3], [4, 5, 6]]).to(dtype=t.dtype) init_copy_(t, t_other) assert torch.equal(t, t_other)
def test_add_diag(): diag = Variable(torch.Tensor([4])) lazy_var = make_sum_lazy_var().add_diag(diag) assert torch.equal(lazy_var.evaluate().data, (t1_eval + t2_eval + torch.eye(4) * 4))
def forward(self, input): # 训练态 if self.training: self.step += 1 if self.bn: # 先做普通卷积得到A,以取得BN参数 output = F.conv2d(input=input, weight=self.weight, bias=self.bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) # 更新BN统计参数(batch和running) dims = [dim for dim in range(4) if dim != 1] self.batch_mean = torch.mean(output, dim=dims) self.batch_var = torch.var(output, dim=dims) with torch.no_grad(): if self.first_bn == 0 and torch.equal( self.running_mean, torch.zeros_like( self.running_mean)) and torch.equal( self.running_var, torch.zeros_like(self.running_var)): self.first_bn.add_(1) self.running_mean.add_(self.batch_mean) self.running_var.add_(self.batch_var) else: self.running_mean.mul_(1 - self.momentum).add_( self.momentum * self.batch_mean) self.running_var.mul_(1 - self.momentum).add_( self.momentum * self.batch_var) # BN融合 if self.step < self.freeze_step: if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.batch_mean) * (self.gamma / torch.sqrt(self.batch_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.batch_mean * (self.gamma / torch.sqrt(self.batch_var + self.eps))) # b融batch weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.batch_var + self.eps)) # w融running else: if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.running_mean) * (self.gamma / torch.sqrt(self.running_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.running_mean * (self.gamma / torch.sqrt(self.running_var + self.eps))) # b融batch weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running else: bias = self.bias weight = self.weight # 测试态 else: # print(self.running_mean, self.running_var) # BN融合 if self.bn: if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.running_mean) * (self.gamma / torch.sqrt(self.running_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.running_mean * (self.gamma / torch.sqrt(self.running_var + self.eps)) ) # b融running weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running else: bias = self.bias weight = self.weight # 量化A和bn融合后的W q_weight = self.weight_quantizer(weight) q_bias = self.bias_quantizer(bias) if self.quantizer_output == True: # 输出量化参数txt文档 # 创建的quantizer_output输出文件夹 if not os.path.isdir('./quantizer_output'): os.makedirs('./quantizer_output') if not os.path.isdir('./quantizer_output/q_weight_out'): os.makedirs('./quantizer_output/q_weight_out') if not os.path.isdir('./quantizer_output/w_scale_out'): os.makedirs('./quantizer_output/w_scale_out') if not os.path.isdir('./quantizer_output/q_weight_max'): os.makedirs('./quantizer_output/q_weight_max') if not os.path.isdir('./quantizer_output/max_weight_count'): os.makedirs('./quantizer_output/max_weight_count') #######################输出当前层的权重量化因子 weight_scale = self.weight_quantizer.get_scale() np.savetxt( ('./quantizer_output/w_scale_out/scale %f.txt' % time.time()), weight_scale, delimiter='\n') #######################输出当前层的量化权重 q_weight_txt = self.weight_quantizer.get_quantize_value(weight) q_weight_txt = np.array(q_weight_txt.cpu()).reshape(1, -1) q_weight_max = [np.max(q_weight_txt)] # q_weight_max = np.argmax(q_weight_txt) max_weight_count = [np.sum(abs(q_weight_txt) >= 255)] # 统计该层溢出的数目 np.savetxt( ('./quantizer_output/max_weight_count/max_weight_count %f.txt' % time.time()), max_weight_count) np.savetxt(('./quantizer_output/q_weight_max/max_weight %f.txt' % time.time()), q_weight_max) np.savetxt(('./quantizer_output/q_weight_out/weight %f.txt' % time.time()), q_weight_txt, delimiter='\n') # io.savemat('save.mat',{'q_weight_txt':q_weight_txt}) #######################创建输出偏置txt的文件夹 if not os.path.isdir('./quantizer_output/q_bias_out'): os.makedirs('./quantizer_output/q_bias_out') if not os.path.isdir('./quantizer_output/b_scale_out'): os.makedirs('./quantizer_output/b_scale_out') #######################输出当前层偏置的量化因子 bias_scale = self.bias_quantizer.get_scale() np.savetxt( ('./quantizer_output/b_scale_out/scale %f.txt' % time.time()), bias_scale, delimiter='\n') #######################输出当前层的量化偏置 q_bias_txt = self.bias_quantizer.get_quantize_value(bias) q_bias_txt = np.array(q_bias_txt.cpu()).reshape(1, -1) np.savetxt( ('./quantizer_output/q_bias_out/bias %f.txt' % time.time()), q_bias_txt, delimiter='\n') # 量化卷积 if self.training: # 训练态 output = F.conv2d( input=input, weight=q_weight, # bias=self.bias, # 注意,这里不加bias(self.bias为None) bias=q_bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) else: # 测试态 output = F.conv2d( input=input, weight=q_weight, bias=q_bias, # 注意,这里加bias,做完整的conv+bn stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) if self.activate == 'leaky': output = F.leaky_relu(output, 0.125 if not self.maxabsscaler else 0.25, inplace=True) elif self.activate == 'relu6': output = F.relu6(output, inplace=True) elif self.activate == 'h_swish': output = output * (F.relu6(output + 3.0, inplace=True) / 6.0) elif self.activate == 'relu': output = F.relu(output, inplace=True) elif self.activate == 'mish': output = output * F.softplus(output).tanh() elif self.activate == 'linear': return output # pass else: print(self.activate + " is not supported !") if self.quantizer_output == True: if not os.path.isdir('./quantizer_output/q_activation_out'): os.makedirs('./quantizer_output/q_activation_out') if not os.path.isdir('./quantizer_output/a_scale_out'): os.makedirs('./quantizer_output/a_scale_out') if not os.path.isdir('./quantizer_output/q_activation_max'): os.makedirs('./quantizer_output/q_activation_max') if not os.path.isdir('./quantizer_output/max_activation_count'): os.makedirs('./quantizer_output/max_activation_count') ##################输出当前激活的量化因子 activation_scale = self.activation_quantizer.get_scale() np.savetxt( ('./quantizer_output/a_scale_out/scale %f.txt' % time.time()), activation_scale, delimiter='\n') ##################输出当前层的量化激活 q_activation_txt = self.activation_quantizer.get_quantize_value( output) q_activation_txt = np.array(q_activation_txt.cpu()).reshape(1, -1) q_activation_max = [np.max(q_activation_txt)] # 统计该层的最大值(即查看是否有溢出) max_activation_count = [np.sum(abs(q_activation_txt) >= 255) ] # 统计该层溢出的数目 # q_weight_max = np.argmax(q_weight_txt) np.savetxt(( './quantizer_output/max_activation_count/max_activation_count %f.txt' % time.time()), max_activation_count) np.savetxt( ('./quantizer_output/q_activation_max/max_activation %f.txt' % time.time()), q_activation_max) np.savetxt( ('./quantizer_output/q_activation_out/activation %f.txt' % time.time()), q_activation_txt, delimiter='\n') output = self.activation_quantizer(output) return output
def train_per_batch_transform_on_device(self, batch: Any) -> Any: assert self.training assert self.current_fn == "per_batch_transform_on_device" self.train_per_batch_transform_on_device_called = True assert torch.equal(batch, tensor([[0, 1, 2, 3, 5], [0, 1, 2, 3, 5]]))