def test_add_output_dim(self, cuda=False): for double in (False, True): tkwargs = { "device": torch.device("cuda") if cuda else torch.device("cpu"), "dtype": torch.double if double else torch.float, } original_batch_shape = torch.Size([2]) # check exception is raised X = torch.rand(2, 1, **tkwargs) with self.assertRaises(ValueError): add_output_dim(X=X, original_batch_shape=original_batch_shape) # test no new batch dims X = torch.rand(2, 2, 1, **tkwargs) X_out, output_dim_idx = add_output_dim( X=X, original_batch_shape=original_batch_shape ) self.assertTrue(torch.equal(X_out, X.unsqueeze(0))) self.assertEqual(output_dim_idx, 0) # test new batch dims X = torch.rand(3, 2, 2, 1, **tkwargs) X_out, output_dim_idx = add_output_dim( X=X, original_batch_shape=original_batch_shape ) self.assertTrue(torch.equal(X_out, X.unsqueeze(1))) self.assertEqual(output_dim_idx, 1)
def grad2(): W = Variable(torch.rand(2, 2), requires_grad=True) W2 = Variable(torch.rand(2, 1), requires_grad=True) x1 = Variable(torch.rand(1, 2), requires_grad=True) x2 = Variable(torch.rand(1, 2), requires_grad=True) print("w: ") print(W) print("x1: ") print(x1) print("x2: ") print(x2) print("--------------------") y1 = torch.matmul(torch.matmul(x1, W), W2) print(torch.matmul(W, W2)) # y = Variable(y, requires_grad=True) # print("y1:") # print(y1) y1.backward() # print(W.grad) print(x1.grad) # W.grad.data.zero_() # x1.grad.data.zero_() y2 = torch.matmul(torch.matmul(x2, W), W2) y2.backward() # print("y2: ") # print(y2) # print(W.grad) print(x2.grad)
def test_FixedNoiseMultiTaskGP_single_output(self, cuda=False): for double in (False, True): tkwargs = { "device": torch.device("cuda") if cuda else torch.device("cpu"), "dtype": torch.double if double else torch.float, } model = _get_fixed_noise_model_single_output(**tkwargs) self.assertIsInstance(model, FixedNoiseMultiTaskGP) self.assertIsInstance(model.likelihood, FixedNoiseGaussianLikelihood) self.assertIsInstance(model.mean_module, ConstantMean) self.assertIsInstance(model.covar_module, ScaleKernel) matern_kernel = model.covar_module.base_kernel self.assertIsInstance(matern_kernel, MaternKernel) self.assertIsInstance(matern_kernel.lengthscale_prior, GammaPrior) self.assertIsInstance(model.task_covar_module, IndexKernel) self.assertEqual(model._rank, 2) self.assertEqual( model.task_covar_module.covar_factor.shape[-1], model._rank ) # test model fitting mll = ExactMarginalLogLikelihood(model.likelihood, model) mll = fit_gpytorch_model(mll, options={"maxiter": 1}) # test posterior test_x = torch.rand(2, 1, **tkwargs) posterior_f = model.posterior(test_x) self.assertIsInstance(posterior_f, GPyTorchPosterior) self.assertIsInstance(posterior_f.mvn, MultivariateNormal) # test posterior (batch eval) test_x = torch.rand(3, 2, 1, **tkwargs) posterior_f = model.posterior(test_x) self.assertIsInstance(posterior_f, GPyTorchPosterior) self.assertIsInstance(posterior_f.mvn, MultivariateNormal)
def get_loss(self, image_a_pred, image_b_pred, mask_a, mask_b): loss = 0 # get the nonzero indices mask_a_indices_flat = torch.nonzero(mask_a) mask_b_indices_flat = torch.nonzero(mask_b) if len(mask_a_indices_flat) == 0: return Variable(torch.cuda.LongTensor([0]), requires_grad=True) if len(mask_b_indices_flat) == 0: return Variable(torch.cuda.LongTensor([0]), requires_grad=True) # take 5000 random pixel samples of the object, using the mask num_samples = 10000 rand_numbers_a = (torch.rand(num_samples)*len(mask_a_indices_flat)).cuda() rand_indices_a = Variable(torch.floor(rand_numbers_a).type(torch.cuda.LongTensor), requires_grad=False) randomized_mask_a_indices_flat = torch.index_select(mask_a_indices_flat, 0, rand_indices_a).squeeze(1) rand_numbers_b = (torch.rand(num_samples)*len(mask_b_indices_flat)).cuda() rand_indices_b = Variable(torch.floor(rand_numbers_b).type(torch.cuda.LongTensor), requires_grad=False) randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1) # index into the image and get descriptors M_margin = 0.5 # margin parameter random_img_a_object_descriptors = torch.index_select(image_a_pred, 1, randomized_mask_a_indices_flat) random_img_b_object_descriptors = torch.index_select(image_b_pred, 1, randomized_mask_b_indices_flat) pixel_wise_loss = (random_img_a_object_descriptors - random_img_b_object_descriptors).pow(2).sum(dim=2) pixel_wise_loss = torch.add(pixel_wise_loss, -2*M_margin) zeros_vec = torch.zeros_like(pixel_wise_loss) loss += torch.max(zeros_vec, pixel_wise_loss).sum() return loss
def setUp(self, size=(2, 5), batch=3, dtype=torch.float64, device=None, seed=None, mu=None, cov=None, A=None, b=None): '''Test the correctness of batch implementation of mean(). This function will stack `[1 * mu, 2 * mu, ..., batch * mu]`. Then, it will see whether the batch output is accurate or not. Args: size: Tuple size of matrix A. batch: The batch size > 0. dtype: data type. device: In which device. seed: Seed for the random number generator. mu: To test a specific mean mu. cov: To test a specific covariance matrix. A: To test a specific A matrix. b: To test a specific bias b. ''' if seed is not None: torch.manual_seed(seed) if A is None: A = torch.rand(size, dtype=dtype, device=device) if b is None: b = torch.rand(size[0], dtype=dtype, device=device) if mu is None: mu = torch.rand(size[1], dtype=dtype, device=device) if cov is None: cov = rand.definite(size[1], dtype=dtype, device=device, positive=True, semi=False, norm=10**2) self.A = A self.b = b var = torch.diag(cov) self.batch_mean = torch.stack([(i + 1) * mu for i in range(batch)]) self.batch_cov = torch.stack([(i + 1) * cov for i in range(batch)]) self.batch_var = torch.stack([(i + 1) * var for i in range(batch)])
def unit_test(args): ''' test different (kinds of) predicate detectors ''' print("Torch uninitialized 5x3 matrix:") x_t = torch.Tensor(5, 3) print(x_t) print("Torch randomly initialized 5x3 matrix X:") x_t = torch.rand(5, 3) if args.verbose: print(x_t) print("size:", x_t.size()) print("Torch randomly initialized 5x3 matrix Y:") y_t = torch.rand(5, 3) if args.verbose: print(y_t) print("X + Y:") z_t = torch.add(x_t, y_t) print(z_t) print("slice (X + Y)[:, 1]:") print(z_t[:, 1]) num_wrong = 0 print("unit_test: num_tests:", 1, " num_wrong:", num_wrong, " -- ", "FAIL" if num_wrong else "PASS")
def test_FixedNoiseGP(self, cuda=False): for batch_shape in (torch.Size([]), torch.Size([2])): for num_outputs in (1, 2): for double in (False, True): tkwargs = { "device": torch.device("cuda") if cuda else torch.device("cpu"), "dtype": torch.double if double else torch.float, } model = self._get_model( batch_shape=batch_shape, num_outputs=num_outputs, n=10, **tkwargs ) self.assertIsInstance(model, FixedNoiseGP) self.assertIsInstance( model.likelihood, FixedNoiseGaussianLikelihood ) self.assertIsInstance(model.mean_module, ConstantMean) self.assertIsInstance(model.covar_module, ScaleKernel) matern_kernel = model.covar_module.base_kernel self.assertIsInstance(matern_kernel, MaternKernel) self.assertIsInstance(matern_kernel.lengthscale_prior, GammaPrior) # test model fitting mll = ExactMarginalLogLikelihood(model.likelihood, model) mll = fit_gpytorch_model(mll, options={"maxiter": 1}) # Test forward test_x = torch.rand(batch_shape + torch.Size([3, 1]), **tkwargs) posterior = model(test_x) self.assertIsInstance(posterior, MultivariateNormal) # TODO: Pass observation noise into posterior # posterior_obs = model.posterior(test_x, observation_noise=True) # self.assertTrue( # torch.allclose( # posterior_f.variance + 0.01, # posterior_obs.variance # ) # ) # test posterior # test non batch evaluation X = torch.rand(batch_shape + torch.Size([3, 1]), **tkwargs) posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual( posterior.mean.shape, batch_shape + torch.Size([3, num_outputs]) ) # test batch evaluation X = torch.rand( torch.Size([2]) + batch_shape + torch.Size([3, 1]), **tkwargs ) posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual( posterior.mean.shape, torch.Size([2]) + batch_shape + torch.Size([3, num_outputs]), )
def visualize_results(self, epoch, fix=True): self.G.eval() if not os.path.exists(self.result_dir + '/' + self.dataset + '/' + self.model_name): os.makedirs(self.result_dir + '/' + self.dataset + '/' + self.model_name) image_frame_dim = int(np.floor(np.sqrt(self.sample_num))) if fix: """ fixed noise """ samples = self.G(self.sample_z_, self.sample_y_) else: """ random noise """ temp = torch.LongTensor(self.batch_size, 1).random_() % 10 sample_y_ = torch.FloatTensor(self.batch_size, 10) sample_y_.zero_() sample_y_.scatter_(1, temp, 1) if self.gpu_mode: sample_z_, sample_y_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True), \ Variable(sample_y_.cuda(), volatile=True) else: sample_z_, sample_y_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True), \ Variable(sample_y_, volatile=True) samples = self.G(sample_z_, sample_y_) if self.gpu_mode: samples = samples.cpu().data.numpy().transpose(0, 2, 3, 1) else: samples = samples.data.numpy().transpose(0, 2, 3, 1) utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png')
def test_forward_works_on_higher_order_input(self): params = Params({ "words": { "type": "embedding", "num_embeddings": 20, "embedding_dim": 2, }, "characters": { "type": "character_encoding", "embedding": { "embedding_dim": 4, "num_embeddings": 15, }, "encoder": { "type": "cnn", "embedding_dim": 4, "num_filters": 10, "ngram_filter_sizes": [3], }, } }) token_embedder = BasicTextFieldEmbedder.from_params(self.vocab, params) inputs = { 'words': Variable(torch.rand(3, 4, 5, 6) * 20).long(), 'characters': Variable(torch.rand(3, 4, 5, 6, 7) * 15).long(), } assert token_embedder(inputs, num_wrapping_dims=2).size() == (3, 4, 5, 6, 12)
def test_fit_valid_sets_args(self, gtvs): x = torch.rand(1,5) y = torch.rand(1,5) val_data = (1,2) val_split = 0.2 shuffle = False torchmodel = MagicMock() torchmodel.forward = Mock(return_value=1) optimizer = MagicMock() metric = Metric('test') loss = torch.tensor([2], requires_grad=True) criterion = Mock(return_value=loss) gtvs.return_value = (1, 2) torchbearermodel = Model(torchmodel, optimizer, criterion, [metric]) torchbearermodel.fit_generator = Mock() torchbearermodel.fit(x, y, 1, validation_data=val_data, validation_split=val_split, shuffle=shuffle) gtvs.assert_called_once() self.assertTrue(list(gtvs.call_args[0][0].numpy()[0]) == list(x.numpy()[0])) self.assertTrue(list(gtvs.call_args[0][1].numpy()[0]) == list(y.numpy()[0])) self.assertTrue(gtvs.call_args[0][2] == val_data) self.assertTrue(gtvs.call_args[0][3] == val_split) self.assertTrue(gtvs.call_args[1]['shuffle'] == shuffle)
def test_upper_confidence_bound(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([[0.0]], device=device, dtype=dtype) variance = torch.tensor([[1.0]], device=device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = UpperConfidenceBound(model=mm, beta=1.0) X = torch.zeros(1, 1, device=device, dtype=dtype) ucb = module(X) ucb_expected = torch.tensor([1.0], device=device, dtype=dtype) self.assertTrue(torch.allclose(ucb, ucb_expected, atol=1e-4)) module = UpperConfidenceBound(model=mm, beta=1.0, maximize=False) X = torch.zeros(1, 1, device=device, dtype=dtype) ucb = module(X) ucb_expected = torch.tensor([-1.0], device=device, dtype=dtype) self.assertTrue(torch.allclose(ucb, ucb_expected, atol=1e-4)) # check for proper error if multi-output model mean2 = torch.rand(1, 2, device=device, dtype=dtype) variance2 = torch.rand(1, 2, device=device, dtype=dtype) mm2 = MockModel(MockPosterior(mean=mean2, variance=variance2)) module2 = UpperConfidenceBound(model=mm2, beta=1.0) with self.assertRaises(UnsupportedError): module2(X)
def visualize_results(self, epoch, fix=True): self.G.eval() if not os.path.exists(self.result_dir + '/' + self.dataset + '/' + self.model_name): os.makedirs(self.result_dir + '/' + self.dataset + '/' + self.model_name) tot_num_samples = min(self.sample_num, self.batch_size) image_frame_dim = int(np.floor(np.sqrt(tot_num_samples))) if fix: """ fixed noise """ samples = self.G(self.sample_z_) else: """ random noise """ if self.gpu_mode: sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) else: sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) samples = self.G(sample_z_) if self.gpu_mode: samples = samples.cpu().data.numpy().transpose(0, 2, 3, 1) else: samples = samples.data.numpy().transpose(0, 2, 3, 1) utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png')
def sample_relax(logits): #, k=1): # u = torch.rand(B,C).clamp(1e-8, 1.-1e-8) #.cuda() u = torch.rand(B,C).clamp(1e-12, 1.-1e-12) #.cuda() gumbels = -torch.log(-torch.log(u)) z = logits + gumbels b = torch.argmax(z, dim=1) cat = Categorical(logits=logits) logprob = cat.log_prob(b).view(B,1) v_k = torch.rand(B,1).clamp(1e-12, 1.-1e-12) z_tilde_b = -torch.log(-torch.log(v_k)) #this way seems biased even tho it shoudlnt be # v_k = torch.gather(input=u, dim=1, index=b.view(B,1)) # z_tilde_b = torch.gather(input=z, dim=1, index=b.view(B,1)) v = torch.rand(B,C).clamp(1e-12, 1.-1e-12) #.cuda() probs = torch.softmax(logits,dim=1).repeat(B,1) # print (probs.shape, torch.log(v_k).shape, torch.log(v).shape) # fasdfa # print (v.shape) # print (v.shape) z_tilde = -torch.log((- torch.log(v) / probs) - torch.log(v_k)) # print (z_tilde) # print (z_tilde_b) z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b) # print (z_tilde) # fasdfs return z, b, logprob, z_tilde
def test_degenerate_GPyTorchPosterior(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # singular covariance matrix degenerate_covar = torch.tensor( [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=device ) mean = torch.rand(3, dtype=dtype, device=device) mvn = MultivariateNormal(mean, lazify(degenerate_covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) variance_exp = degenerate_covar.diag().unsqueeze(-1) self.assertTrue(torch.equal(posterior.variance, variance_exp)) # rsample with warnings.catch_warnings(record=True) as w: # we check that the p.d. warning is emitted - this only # happens once per posterior, so we need to check only once samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape) b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) with warnings.catch_warnings(record=True) as w: b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def run_test_argmax(): test_argmax = TestArgMax() k=torch.rand(4) v=torch.rand(4) y=torch.rand(4) loss = test_argmax(k,v,y) loss.backward()
def setUp(self): # Tests will use 3 filters and image width, height = 2 X 2 # Batch size 1 x = torch.ones((1, 3, 2, 2)) x[0, 0, 1, 0] = 1.1 x[0, 0, 1, 1] = 1.2 x[0, 1, 0, 1] = 1.2 x[0, 2, 1, 0] = 1.3 self.x = x self.gradient = torch.rand(x.shape) # Batch size 2 x = torch.ones((2, 3, 2, 2)) x[0, 0, 1, 0] = 1.1 x[0, 0, 1, 1] = 1.2 x[0, 1, 0, 1] = 1.2 x[0, 2, 1, 0] = 1.3 x[1, 0, 0, 0] = 1.4 x[1, 1, 0, 0] = 1.5 x[1, 1, 0, 1] = 1.6 x[1, 2, 1, 1] = 1.7 self.x2 = x self.gradient2 = torch.rand(x.shape) # All equal self.dutyCycle = torch.zeros((1, 3, 1, 1)) self.dutyCycle[:] = 1.0 / 3.0
def sample_relax(probs): #Sample z u = torch.rand(B,C) gumbels = -torch.log(-torch.log(u)) z = torch.log(probs) + gumbels b = torch.argmax(z, dim=1) logprob = cat.log_prob(b) #Sample z_tilde u_b = torch.rand(B,1) z_tilde_b = -torch.log(-torch.log(u_b)) u = torch.rand(B,C) z_tilde = -torch.log((- torch.log(u) / probs) - torch.log(u_b)) # print (z_tilde) z_tilde[:,b] = z_tilde_b # print (z_tilde) # fasdfasd # print (z) # print (b) # print (z_tilde) # print (logprob) # print (probs) # fsdfa return z, b, logprob, z_tilde
def sample_relax_given_class(logits, samp): cat = Categorical(logits=logits) u = torch.rand(B,C).clamp(1e-8, 1.-1e-8) gumbels = -torch.log(-torch.log(u)) z = logits + gumbels b = samp #torch.argmax(z, dim=1) logprob = cat.log_prob(b).view(B,1) u_b = torch.gather(input=u, dim=1, index=b.view(B,1)) z_tilde_b = -torch.log(-torch.log(u_b)) z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits, dim=1)) - torch.log(u_b)) z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b) z = z_tilde u_b = torch.gather(input=u, dim=1, index=b.view(B,1)) z_tilde_b = -torch.log(-torch.log(u_b)) u = torch.rand(B,C).clamp(1e-8, 1.-1e-8) z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits, dim=1)) - torch.log(u_b)) z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b) return z, z_tilde, logprob
def test_forward_runs_with_non_bijective_mapping(self): elmo_fixtures_path = self.FIXTURES_ROOT / 'elmo' options_file = str(elmo_fixtures_path / 'options.json') weight_file = str(elmo_fixtures_path / 'lm_weights.hdf5') params = Params({ "token_embedders": { "words": { "type": "embedding", "num_embeddings": 20, "embedding_dim": 2, }, "elmo": { "type": "elmo_token_embedder", "options_file": options_file, "weight_file": weight_file }, }, "embedder_to_indexer_map": {"words": ["words"], "elmo": ["elmo", "words"]} }) token_embedder = BasicTextFieldEmbedder.from_params(self.vocab, params) inputs = { 'words': (torch.rand(3, 6) * 20).long(), 'elmo': (torch.rand(3, 6, 50) * 15).long(), } token_embedder(inputs)
def test_sequential_scorer_d4_3(): global test_doc torch.manual_seed(1) seq = SequentialScorer(TEST_EMBEDDING_DIM, min_features, 2, COREF_FF_HIDDEN) emb5 = ag.Variable(torch.rand(1, TEST_EMBEDDING_DIM)) emb0 = ag.Variable(torch.rand(1, TEST_EMBEDDING_DIM)) pred = float(seq(emb5, emb0, ['exact-match', 'last-token-match'])) assert_almost_equals(pred, -0.359851, places=4)
def _get_random_data(n, **tkwargs): train_x1 = torch.linspace(0, 0.95, n + 1, **tkwargs) + 0.05 * torch.rand( n + 1, **tkwargs ) train_x2 = torch.linspace(0, 0.95, n, **tkwargs) + 0.05 * torch.rand(n, **tkwargs) train_y1 = torch.sin(train_x1 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x1) train_y2 = torch.cos(train_x2 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x2) return train_x1.unsqueeze(-1), train_x2.unsqueeze(-1), train_y1, train_y2
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 64 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # BEGAN parameters self.gamma = 0.75 self.lambda_ = 0.001 self.k = 0. # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() # self.L1_loss = torch.nn.L1loss().cuda() # BEGAN does not work well when using L1loss(). # else: # self.L1_loss = torch.nn.L1loss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load dataset if self.dataset == 'mnist': self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'fashion-mnist': self.data_loader = DataLoader( datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, shuffle=True) self.z_dim = 62 # fixed noise if self.gpu_mode: self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) else: self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True)
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 64 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # EBGAN parameters self.pt_loss_weight = 0.1 self.margin = max(1, self.batch_size / 64.) # margin for loss function # usually margin of 1 is enough, but for large batch size it must be larger than 1 # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load dataset if self.dataset == 'mnist': self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'fashion-mnist': self.data_loader = DataLoader( datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, shuffle=True) self.z_dim = 62 # fixed noise if self.gpu_mode: self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) else: self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True)
def sample_relax_given_b(logits, b): u_b = torch.rand(B,1).clamp(1e-10, 1.-1e-10).cuda() z_tilde_b = -torch.log(-torch.log(u_b)) u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda() z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits,dim=1)) - torch.log(u_b)) z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b) return z_tilde
def test_GPyTorchPosterior(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.rand(3, dtype=dtype, device=device) variance = 1 + torch.rand(3, dtype=dtype, device=device) covar = variance.diag() mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) self.assertTrue(torch.equal(posterior.variance, variance.unsqueeze(-1))) # rsample samples = posterior.rsample() self.assertEqual(samples.shape, torch.Size([1, 3, 1])) samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): posterior.rsample( sample_shape=torch.Size([3]), base_samples=base_samples ) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=device) b_covar = b_variance.unsqueeze(-1) * torch.eye(3).type_as(b_variance) b_mvn = MultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 1, device=device, dtype=dtype) b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def test_MockPosterior(self): mean = torch.rand(2) variance = torch.eye(2) samples = torch.rand(1, 2) mp = MockPosterior(mean=mean, variance=variance, samples=samples) self.assertTrue(torch.equal(mp.mean, mean)) self.assertTrue(torch.equal(mp.variance, variance)) self.assertTrue(torch.all(mp.sample() == samples.unsqueeze(0))) self.assertTrue( torch.all(mp.sample(torch.Size([2])) == samples.repeat(2, 1, 1)) )
def sample_code(num, cat_dim=0, cont_dim=0, bin_dim=0, device=None) -> torch.Tensor: cat_onehot = cont = bin = None if cat_dim > 0: cat = torch.randint(cat_dim, size=(num, 1), dtype=torch.long, device=device) cat_onehot = torch.zeros(num, cat_dim, dtype=torch.float, device=device) cat_onehot.scatter_(1, cat, 1) if cont_dim > 0: cont = 2. * torch.rand(num, cont_dim, device=device) - 1. if bin_dim > 0: bin = (torch.rand(num, bin_dim, device=device) > .5).float() return torch.cat([x for x in [cat_onehot, cont, bin] if x is not None], 1)
def run_test(): test = Test() a=Variable(torch.rand(4,5)) b=Variable(torch.rand(4,5)) c=torch.rand(4) d=torch.rand(4) # ground-truth #cv=Variable(c) loss = test(c,d) loss.backward()
def test_HeterskedasticSingleTaskGP(self, cuda=False): for batch_shape in (torch.Size([]), torch.Size([2])): for num_outputs in (1, 2): for double in (False, True): tkwargs = { "device": torch.device("cuda") if cuda else torch.device("cpu"), "dtype": torch.double if double else torch.float, } model = self._get_model( batch_shape=batch_shape, num_outputs=num_outputs, **tkwargs ) # test init self.assertIsInstance(model.mean_module, ConstantMean) self.assertIsInstance(model.covar_module, ScaleKernel) matern_kernel = model.covar_module.base_kernel self.assertIsInstance(matern_kernel, MaternKernel) self.assertIsInstance(matern_kernel.lengthscale_prior, GammaPrior) likelihood = model.likelihood self.assertIsInstance(likelihood, _GaussianLikelihoodBase) self.assertFalse(isinstance(likelihood, GaussianLikelihood)) self.assertIsInstance(likelihood.noise_covar, HeteroskedasticNoise) # test forward test_x = torch.rand(batch_shape + torch.Size([3, 1]), **tkwargs) posterior = model(test_x) self.assertIsInstance(posterior, MultivariateNormal) # check param sizes params = dict(model.named_parameters()) for p in params: self.assertEqual( params[p].numel(), num_outputs * torch.tensor(batch_shape).prod().item(), ) # test posterior # test non batch evaluation X = torch.rand(batch_shape + torch.Size([3, 1]), **tkwargs) posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual( posterior.mean.shape, batch_shape + torch.Size([3, num_outputs]) ) # test batch evaluation X = torch.rand( torch.Size([2]) + batch_shape + torch.Size([3, 1]), **tkwargs ) posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual( posterior.mean.shape, torch.Size([2]) + batch_shape + torch.Size([3, num_outputs]), )
def test_gpytorch_model(self): train_X = torch.rand(5, 1) train_Y = torch.sin(train_X.squeeze()) # basic test model = SimpleGPyTorchModel(train_X, train_Y) test_X = torch.rand(2, 1) posterior = model.posterior(test_X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual(posterior.mean.shape, torch.Size([2, 1])) # test observation noise posterior = model.posterior(test_X, observation_noise=True) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual(posterior.mean.shape, torch.Size([2, 1]))
def summary(model, input_size, batch_size=-1, device="cuda"): def register_hook(module): def hook(module, input, output): class_name = str(module.__class__).split(".")[-1].split("'")[0] module_idx = len(summary) m_key = "%s-%i" % (class_name, module_idx + 1) summary[m_key] = OrderedDict() summary[m_key]["input_shape"] = list(input[0].size()) summary[m_key]["input_shape"][0] = batch_size if isinstance(output, (list, tuple)): summary[m_key]["output_shape"] = [[-1] + list(o.size())[1:] for o in output] else: summary[m_key]["output_shape"] = list(output.size()) summary[m_key]["output_shape"][0] = batch_size params = 0 params_bits = 0 # TODO: handle batchnorm params if hasattr(module, "weight") and hasattr(module.weight, "size"): weight_params = torch.prod( torch.LongTensor(list(module.weight.size()))) params += weight_params params_bits += weight_params * 32 summary[m_key]["trainable"] = module.weight.requires_grad if hasattr(module, "shift") and hasattr(module.shift, "size"): assert (hasattr(module, "sign")) assert (hasattr(module.sign, "size")) assert (module.shift.size() == module.sign.size()) shift_params = torch.prod( torch.LongTensor(list(module.shift.size()))) params += shift_params params_bits += shift_params * 5 summary[m_key]["trainable"] = module.shift.requires_grad if hasattr(module, "bias") and hasattr(module.bias, "size"): bias_params = torch.prod( torch.LongTensor(list(module.bias.size()))) params += bias_params params_bits += bias_params * 32 if hasattr(module, "running_mean") and hasattr( module.running_mean, "size") and hasattr( module, "track_running_stats") and module.track_running_stats: running_mean_params = torch.prod( torch.LongTensor(list(module.running_mean.size()))) params += running_mean_params params_bits += running_mean_params * 32 if hasattr(module, "running_var") and hasattr( module.running_var, "size") and hasattr( module, "track_running_stats") and module.track_running_stats: running_var_params = torch.prod( torch.LongTensor(list(module.running_var.size()))) params += running_var_params params_bits += running_var_params * 32 summary[m_key]["nb_params"] = params summary[m_key]["bits_params"] = params_bits if (not isinstance(module, nn.Sequential) and not isinstance(module, nn.ModuleList) and not (module == model)): hooks.append(module.register_forward_hook(hook)) device = device.lower() assert device in [ "cuda", "cpu", ], "Input device is not valid, please specify 'cuda' or 'cpu'" if device == "cuda" and torch.cuda.is_available(): dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor # multiple inputs to the network if isinstance(input_size, tuple): input_size = [input_size] # batch_size of at least 2 for each GPU for batchnorm n_samples = (torch.cuda.device_count() + 1) * 2 x = [torch.rand(n_samples, *in_size).type(dtype) for in_size in input_size] # print(type(x[0])) # create properties summary = OrderedDict() hooks = [] # register hook model.apply(register_hook) # make a forward pass # print(x.shape) model(*x) # remove these hooks for h in hooks: h.remove() print("----------------------------------------------------------------") line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") print(line_new) print("================================================================") total_params = 0 total_params_bits = 0 total_output = 0 trainable_params = 0 for layer in summary: # input_shape, output_shape, trainable, nb_params line_new = "{:>20} {:>25} {:>15}".format( layer, str(summary[layer]["output_shape"]), "{0:,}".format(summary[layer]["nb_params"]), ) total_params += summary[layer]["nb_params"] total_params_bits += summary[layer]["bits_params"] total_output += np.prod(summary[layer]["output_shape"]) if "trainable" in summary[layer]: if summary[layer]["trainable"] == True: trainable_params += summary[layer]["nb_params"] print(line_new) # assume 4 bytes/number (float on cuda). total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024**2.)) total_output_size = abs(2. * total_output * 4. / (1024**2.)) # x2 for gradients total_params_size = abs(total_params_bits.numpy() / (8. * (1024**2.))) total_size = total_params_size + total_output_size + total_input_size print("================================================================") print("Total params: {0:,}".format(total_params)) print("Trainable params: {0:,}".format(trainable_params)) print("Non-trainable params: {0:,}".format(total_params - trainable_params)) print("----------------------------------------------------------------") print("Input size (MB): %0.2f" % total_input_size) print("Forward/backward pass size (MB): %0.2f" % total_output_size) print("Params size (MB): %0.2f" % total_params_size) print("Estimated Total Size (MB): %0.2f" % total_size) print("----------------------------------------------------------------")
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_pose_net')( config, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', config.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } #32*3*256*192 dump_input = torch.rand( (config.TRAIN.BATCH_SIZE, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, ), verbose=False) gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() optimizer = get_optimizer(config, model) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) best_perf = 0.0 best_model = False for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch #test train_loader dataiter = train_dataset[0] train(config, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': get_model_name(config), 'state_dict': model.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info( 'saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def train(self): iteration = -1 label = Variable(torch.FloatTensor(batch_size, 1)).to(device) while self.epoch <= max_epoch: adjust_learning_rate(self.optimizer_G, iteration) adjust_learning_rate(self.optimizer_D, iteration) for i, (anime_tag, anime_img) in enumerate(self.data_loader): iteration += 1 if anime_img.shape[0] != batch_size: continue anime_img = Variable(anime_img).to(device) anime_tag = Variable(torch.FloatTensor(anime_tag)).to(device) # D : G = 2 : 1 # 1. Training D # 1.1. use real image for discriminating self.D.zero_grad() label_p, tag_p = self.D(anime_img) label.data.fill_(1.0) # 1.2. real image's loss real_label_loss = self.label_criterion(label_p, label) real_tag_loss = self.tag_criterion(tag_p, anime_tag) real_loss_sum = real_label_loss * lambda_adv / 2.0 + real_tag_loss * lambda_adv / 2.0 real_loss_sum.backward() # 1.3. use fake image for discriminating g_noise, fake_tag = utils.fake_generator( batch_size, noise_size, device) fake_feat = torch.cat([g_noise, fake_tag], dim=1) fake_img = self.G(fake_feat).detach() fake_label_p, fake_tag_p = self.D(fake_img) label.data.fill_(.0) # 1.4. fake image's loss fake_label_loss = self.label_criterion(fake_label_p, label) fake_tag_loss = self.tag_criterion(fake_tag_p, fake_tag) fake_loss_sum = fake_label_loss * lambda_adv / 2.0 + fake_tag_loss * lambda_adv / 2.0 fake_loss_sum.backward() # 1.5. gradient penalty # https://github.com/jfsantos/dragan-pytorch/blob/master/dragan.py alpha_size = [1] * anime_img.dim() alpha_size[0] = anime_img.size(0) alpha = torch.rand(alpha_size).to(device) x_hat = Variable(alpha * anime_img.data + (1 - alpha) * \ (anime_img.data + 0.5 * anime_img.data.std() * Variable(torch.rand(anime_img.size())).to(device)), requires_grad=True).to(device) pred_hat, pred_tag = self.D(x_hat) gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones( pred_hat.size()).to(device), create_graph=True, retain_graph=True, only_inputs=True)[0].view(x_hat.size(0), -1) gradient_penalty = lambda_gp * ( (gradients.norm(2, dim=1) - 1)**2).mean() # gradient_penalty.requires_grad = True gradient_penalty = Variable(gradient_penalty, requires_grad=True) gradient_penalty.backward() # 1.6. update optimizer self.optimizer_D.step() # 2. Training G # 2.1. generate fake image self.G.zero_grad() g_noise, fake_tag = utils.fake_generator( batch_size, noise_size, device) fake_feat = torch.cat([g_noise, fake_tag], dim=1) fake_img = self.G(fake_feat) fake_label_p, fake_tag_p = self.D(fake_img) label.data.fill_(1.0) # 2.2. calc loss label_loss_g = self.label_criterion(fake_label_p, label) tag_loss_g = self.tag_criterion(fake_tag_p, fake_tag) loss_g = label_loss_g * lambda_adv / 2.0 + tag_loss_g * lambda_adv / 2.0 loss_g.backward() # 2.2. update optimizer self.optimizer_G.step() if iteration % verbose_T == 0: print('The iteration is now %d' % iteration) print('The loss is %.4f, %.4f, %.4f, %.4f' % (real_loss_sum, fake_loss_sum, gradient_penalty, loss_g)) vutils.save_image( anime_img.data.view(batch_size, 3, anime_img.size(2), anime_img.size(3)), os.path.join( tmp_path, 'real_image_{}.png'.format( str(iteration).zfill(8)))) g_noise, fake_tag = utils.fake_generator( batch_size, noise_size, device) fake_feat = torch.cat([g_noise, fake_tag], dim=1) fake_img = self.G(fake_feat) vutils.save_image( fake_img.data.view(batch_size, 3, anime_img.size(2), anime_img.size(3)), os.path.join( tmp_path, 'fake_image_{}.png'.format( str(iteration).zfill(8)))) # dump checkpoint torch.save( { 'epoch': self.epoch, 'D': self.D.state_dict(), 'G': self.G.state_dict(), 'optimizer_D': self.optimizer_D.state_dict(), 'optimizer_G': self.optimizer_G.state_dict(), }, '{}/checkpoint_{}.tar'.format(model_dump_path, str(self.epoch).zfill(4))) self.epoch += 1
x = self.conv_last(x) x = self.bn_last(x) x = self.activation(x) # # # average pooling layer # x = self.avgpool(x) # print(x.shape) # # flatten for input to fully-connected layer # x = x.view(x.size(0), -1) # x = self.fc(x) return output[0], output[1], x # return x#F.log_softmax(x, dim=1) #这个是速度测试 if __name__ == "__main__": model = ShuffleNetV2(scale=1, in_channels=3, c_tag=0.5, num_classes=2, activation=nn.ReLU, SE=False, residual=False) for i in range(3): t1 = time.time() x = torch.rand(1,3, 352, 352) out3, out4, out5 = model(x) # print(out3) print(out3.size()) print(out4.size()) print(out5.size()) cnt = time.time() - t1 print(cnt)
def summary(self, input_size): def register_hook(module): def hook(module, input, output): if module._modules: # only want base layers return class_name = str(module.__class__).split('.')[-1].split("'")[0] module_idx = len(summary) m_key = '%s-%i' % (class_name, module_idx + 1) summary[m_key] = OrderedDict() summary[m_key]['input_shape'] = list(input[0].size()) summary[m_key]['input_shape'][0] = None if output.__class__.__name__ == 'tuple': summary[m_key]['output_shape'] = list(output[0].size()) else: summary[m_key]['output_shape'] = list(output.size()) summary[m_key]['output_shape'][0] = None params = 0 # iterate through parameters and count num params for name, p in module._parameters.items(): params += torch.numel(p.data) summary[m_key]['trainable'] = p.requires_grad summary[m_key]['nb_params'] = params if not isinstance(module, torch.nn.Sequential) and \ not isinstance(module, torch.nn.ModuleList) and \ not (module == self): hooks.append(module.register_forward_hook(hook)) # check if there are multiple inputs to the network if isinstance(input_size[0], (list, tuple)): x = [(torch.rand(1, *in_size)) for in_size in input_size] else: x = (torch.randn(1, *input_size)) # create properties summary = OrderedDict() hooks = [] # register hook self.apply(register_hook) # make a forward pass self(x) # remove these hooks for h in hooks: h.remove() # print out neatly def get_names(module, name, acc): if not module._modules: acc.append(name) else: for key in module._modules.keys(): p_name = key if name == "" else name + "." + key get_names(module._modules[key], p_name, acc) names = [] get_names(self, "", names) col_width = 25 # should be >= 12 summary_width = 61 def crop(s): return s[:col_width] if len(s) > col_width else s print('_' * summary_width) print('{0: <{3}} {1: <{3}} {2: <{3}}'.format( 'Layer (type)', 'Output Shape', 'Param #', col_width)) print('=' * summary_width) total_params = 0 trainable_params = 0 for (i, l_type), l_name in zip(enumerate(summary), names): d = summary[l_type] total_params += d['nb_params'] if 'trainable' in d and d['trainable']: trainable_params += d['nb_params'] print('{0: <{3}} {1: <{3}} {2: <{3}}'.format( crop(l_name + ' (' + l_type[:-2] + ')'), crop(str(d['output_shape'])), crop(str(d['nb_params'])), col_width)) if i < len(summary) - 1: print('_' * summary_width) print('=' * summary_width) print('Total params: ' + str(total_params)) print('Trainable params: ' + str(trainable_params)) print('Non-trainable params: ' + str((total_params - trainable_params))) print('_' * summary_width)
def get_summary(net, input_size, batch_size=1, device="cuda", verbose=False): s = "" mdict = {} for n,m in net.named_modules(): mdict[n] = m def register_hook(module): def hook(module, input, output): class_name = str(module.__class__).split(".")[-1].split("'")[0] module_idx = len(summary) m_key = next(n for n,m in mdict.items() if m==module) summary[m_key] = OrderedDict() summary[m_key]["input_shape"] = list(input[0].size()) summary[m_key]["input_shape"][0] = batch_size if isinstance(output, (list, tuple)): summary[m_key]["output_shape"] = [ [-1] + list(o.size())[1:] for o in output ] else: summary[m_key]["output_shape"] = list(output.size()) summary[m_key]["output_shape"][0] = batch_size params = 0 if hasattr(module, "weight") and hasattr(module.weight, "size"): try: params += torch.prod(torch.LongTensor(list(module.weight.size()))) / module.group except AttributeError: params += torch.prod(torch.LongTensor(list(module.weight.size()))) summary[m_key]["trainable"] = module.weight.requires_grad if hasattr(module, "bias") and hasattr(module.bias, "size"): params += torch.prod(torch.LongTensor(list(module.bias.size()))) summary[m_key]["nb_params"] = params if hasattr(module, "W_precision"): summary[m_key]['W_bits'] = module.W_precision.get_bits() if hasattr(module, "precision"): summary[m_key]['bits'] = module.precision.get_bits() if ( not isinstance(module, torch.nn.Sequential) and not isinstance(module, torch.nn.ModuleList) and not (module == net) ): hooks.append(module.register_forward_hook(hook)) device = device.lower() assert device in [ "cuda", "cpu", ], "Input device is not valid, please specify 'cuda' or 'cpu'" if device == "cuda" and torch.cuda.is_available(): dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor # multiple inputs to the network if isinstance(input_size, tuple): input_size = [input_size] # batch_size of 2 for batchnorm x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size] # create properties summary = OrderedDict() hooks = [] # register hook net.apply(register_hook) # make a forward pass net(*x) # remove these hooks for h in hooks: h.remove() s += "----------------------------------------------------------------" + "\n" line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") s += line_new + "\n" s += "================================================================" + "\n" total_params = 0 total_output = 0 trainable_params = 0 params_size = 0 output_size = 0 input_size = 0 for layer in summary: # input_shape, output_shape, trainable, nb_params line_new = "{:>20} {:>25} {:>15}".format( layer, str(summary[layer]["output_shape"]), "{0:,}".format(summary[layer]["nb_params"]), ) total_params += summary[layer]["nb_params"] try: params_size += abs(summary[layer]["nb_params"] * summary[layer]["W_bits"] / 8. / (1024.)) except KeyError: params_size += abs(summary[layer]["nb_params"] * 32. / 8. / (1024.)) total_output += np.prod(summary[layer]["output_shape"]) try: output_size = max(output_size, np.prod(summary[layer]["output_shape"]) * summary[layer]["bits"] / 8 / (1024.)) except KeyError: output_size = max(output_size, np.prod(summary[layer]["output_shape"]) * 32 / 8 / (1024.)) if "trainable" in summary[layer]: if summary[layer]["trainable"] == True: trainable_params += summary[layer]["nb_params"] s += line_new + "\n" s += "================================================================" + "\n" s += "Total params: {0:,}".format(total_params) + "\n" s += "Trainable params: {0:,}".format(trainable_params) + "\n" s += "Non-trainable params: {0:,}".format(total_params - trainable_params) + "\n" s += "----------------------------------------------------------------" + "\n" s += "Biggest activation tensor size (kB): {0:,.2f}".format(output_size) + "\n" s += "Params size (kB): {0:,.1f}".format(params_size) + "\n" s += "----------------------------------------------------------------" + "\n" if verbose: logging.info(s) return { 'dict': summary, 'prettyprint': s, 'biggest_activation': output_size, 'params_size': params_size }
def __init__(self, dev): super().__init__() n = 8 # Utility arguments, created as one-element tuples pointwise0_bf16 = (torch.randn(n, dtype=torch.bfloat16, device=dev),) pointwise1_bf16 = (torch.randn(n, dtype=torch.bfloat16, device=dev),) pointwise2_bf16 = (torch.randn(n, dtype=torch.bfloat16, device=dev),) mat0_bf16 = (torch.randn((n, n), dtype=torch.bfloat16, device=dev),) mat1_bf16 = (torch.randn((n, n), dtype=torch.bfloat16, device=dev),) mat2_bf16 = (torch.randn((n, n), dtype=torch.bfloat16, device=dev),) dummy_dimsets = ((n,), (n, n), (n, n, n), (n, n, n, n), (n, n, n, n, n)) dummy_bf16 = [(torch.randn(dimset, dtype=torch.bfloat16, device=dev),) for dimset in dummy_dimsets] dimsets = ((n, n, n), (n, n, n, n), (n, n, n, n, n)) conv_args_bf16 = [(torch.randn(dimset, dtype=torch.bfloat16, device=dev), torch.randn(dimset, dtype=torch.bfloat16, device=dev)) for dimset in dimsets] conv_args_fp32 = [(torch.randn(dimset, dtype=torch.float32, device=dev), torch.randn(dimset, dtype=torch.float32, device=dev)) for dimset in dimsets] bias_fp32 = (torch.randn((n,), dtype=torch.float32, device=dev),) element0_fp32 = (torch.randn(1, dtype=torch.float32, device=dev),) pointwise0_fp32 = (torch.randn(n, dtype=torch.float32, device=dev),) pointwise1_fp32 = (torch.randn(n, dtype=torch.float32, device=dev),) mat0_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),) mat1_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),) mat2_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),) mat3_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),) dummy_fp32 = [(torch.randn(dimset, dtype=torch.float32, device=dev),) for dimset in dummy_dimsets] # The lists below organize ops that autocast needs to test. # self.list_name corresponds to test_autocast_list_name in test/test_cpu.py. # Each op is associated with a tuple of valid arguments. # Some ops implement built-in type promotion. These don't need autocasting, # but autocasting relies on their promotion, so we include tests to double-check. self.torch_expect_builtin_promote = [ ("eq", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("ge", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("gt", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("le", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("lt", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("ne", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("add", pointwise0_fp32 + pointwise1_bf16, torch.float32), ("div", pointwise0_fp32 + pointwise1_bf16, torch.float32), ("mul", pointwise0_fp32 + pointwise1_bf16, torch.float32), ] self.methods_expect_builtin_promote = [ ("__eq__", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("__ge__", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("__gt__", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("__le__", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("__lt__", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("__ne__", pointwise0_fp32 + pointwise1_bf16, torch.bool), ("__add__", pointwise0_fp32 + pointwise1_bf16, torch.float32), ("__div__", pointwise0_fp32 + pointwise1_bf16, torch.float32), ("__mul__", pointwise0_fp32 + pointwise1_bf16, torch.float32), ] # The remaining lists organize ops that autocast treats explicitly. self.torch_bf16 = [ ("conv1d", conv_args_fp32[0]), ("conv2d", conv_args_fp32[1]), ("conv3d", conv_args_fp32[2]), ("bmm", (torch.randn((n, n, n), device=dev, dtype=torch.float32), torch.randn((n, n, n), device=dev, dtype=torch.float32))), ("mm", mat0_fp32 + mat1_fp32), ("matmul", mat0_fp32 + mat1_fp32), ("baddbmm", (torch.randn((n, n, n), device=dev, dtype=torch.float32), torch.randn((n, n, n), device=dev, dtype=torch.float32), torch.randn((n, n, n), device=dev, dtype=torch.float32))), ("addmm", mat1_fp32 + mat2_fp32 + mat3_fp32), ("addbmm", mat0_fp32 + (torch.randn((n, n, n), device=dev, dtype=torch.float32), torch.randn((n, n, n), device=dev, dtype=torch.float32))), ("conv_tbc", (torch.randn((10, 7, 3), device=dev, dtype=torch.float32), torch.randn((5, 3, 5), device=dev, dtype=torch.float32), torch.randn(5, device=dev, dtype=torch.float32), 0)), ] self.torch_fp32 = [ ("conv_transpose1d", conv_args_bf16[0]), ("conv_transpose2d", conv_args_bf16[1]), ("conv_transpose3d", conv_args_bf16[2]), ] self.nn_bf16 = [ ("linear", mat0_fp32 + mat1_fp32, {}), ] self.nn_fp32 = [ ("avg_pool3d", dummy_bf16[3], {"kernel_size": (3, 3, 3), "stride": (1, 1, 1)}), ("binary_cross_entropy", (torch.rand((n, n), device=dev, dtype=torch.bfloat16),) + (torch.rand((n, n), device=dev, dtype=torch.bfloat16),)), ("reflection_pad1d", dummy_bf16[2], {"padding": (3, 3)}), ] self.torch_need_autocast_promote = [ ("cat", (pointwise0_bf16 + pointwise1_fp32,)), ("stack", (pointwise0_bf16 + pointwise1_fp32,)), ]
def train(net, feature, image_id, old_embedding, target_embedding, idx_modified, idx_old_neighbors, idx_new_neighbors, idx_negatives, lr=1e-3, experiment_id=None, socket_id=None, scale_func=None, categories=None, label=None): global cycle, previously_modified cycle += 1 # log and saving options exp_name = 'MapNet' if experiment_id is not None: exp_name = experiment_id + '_' + exp_name log = TBPlotter(os.path.join('runs/mapping', 'tensorboard', exp_name)) log.print_logdir() outpath_config = os.path.join('runs/mapping', exp_name, 'configs') if not os.path.isdir(outpath_config): os.makedirs(outpath_config) outpath_embedding = os.path.join('runs/mapping', exp_name, 'embeddings') if not os.path.isdir(outpath_embedding): os.makedirs(outpath_embedding) outpath_feature = os.path.join('runs/mapping', exp_name, 'features') if not os.path.isdir(outpath_feature): os.makedirs(outpath_feature) outpath_model = os.path.join('runs/mapping', exp_name, 'models') if not os.path.isdir(outpath_model): os.makedirs(outpath_model) # general N = len(feature) use_cuda = torch.cuda.is_available() if not isinstance(old_embedding, torch.Tensor): old_embedding = torch.from_numpy(old_embedding.copy()) if not isinstance(target_embedding, torch.Tensor): target_embedding = torch.from_numpy(target_embedding.copy()) if use_cuda: net = net.cuda() net.train() # Set up differend groups of indices # each sample belongs to one group exactly, hierarchy is as follows: # 1: samples moved by user in this cycle # 2: negatives selected through neighbor method # 3: new neighborhood # 4: samples moved by user in previous cycles # 5: old neighborhood # 5: high dimensional neighborhood of moved samples # 6: fix points / unrelated (remaining) samples # # find high dimensional neighbors idx_high_dim_neighbors, _ = svm_k_nearest_neighbors( feature, np.union1d(idx_modified, idx_new_neighbors), negative_idcs=idx_negatives, k=100 ) # use the first 100 nn of modified samples # TODO: Better rely on distance # ensure there is no overlap between different index groups idx_modified = np.setdiff1d( idx_modified, idx_negatives ) # just ensure in case negatives have moved accidentially TODO: BETTER FILTER BEFORE idx_new_neighbors = np.setdiff1d( idx_new_neighbors, np.concatenate([idx_modified, idx_negatives])) idx_previously_modified = np.setdiff1d( previously_modified, np.concatenate([idx_modified, idx_new_neighbors, idx_negatives])) idx_old_neighbors = np.setdiff1d( np.concatenate([idx_old_neighbors, idx_high_dim_neighbors]), np.concatenate([ idx_modified, idx_new_neighbors, idx_previously_modified, idx_negatives ])) idx_fix_points = np.setdiff1d( range(N), np.concatenate([ idx_modified, idx_new_neighbors, idx_previously_modified, idx_old_neighbors, idx_negatives ])) for i, g1 in enumerate([ idx_modified, idx_new_neighbors, idx_previously_modified, idx_old_neighbors, idx_fix_points, idx_negatives ]): for j, g2 in enumerate([ idx_modified, idx_new_neighbors, idx_previously_modified, idx_old_neighbors, idx_fix_points, idx_negatives ]): if i != j and len(np.intersect1d(g1, g2)) != 0: print('groups: {}, {}'.format(i, j)) print(np.intersect1d(g1, g2)) raise RuntimeError('Index groups overlap.') print('Group Overview:' '\n\tModified samples: {}' '\n\tNegative samples: {}' '\n\tNew neighbors: {}' '\n\tPreviously modified samples: {}' '\n\tOld neighbors: {}' '\n\tFix points: {}'.format(len(idx_modified), len(idx_negatives), len(idx_new_neighbors), len(idx_previously_modified), len(idx_old_neighbors), len(idx_fix_points))) # modify label label[idx_modified, -1] = 'modified' label[idx_negatives, -1] = 'negative' label[idx_previously_modified, -1] = 'prev_modified' label[idx_new_neighbors, -1] = 'new neighbors' label[idx_old_neighbors, -1] = 'old neighbors' label[idx_high_dim_neighbors, -1] = 'high dim neighbors' label[idx_fix_points, -1] = 'other' optimizer = torch.optim.Adam( [p for p in net.parameters() if p.requires_grad], lr=lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, threshold=1e-3, verbose=True) kl_criterion = TSNELoss(N, use_cuda=use_cuda) l2_criterion = torch.nn.MSELoss(reduction='none') # keep the output fixed noise_criterion = NormalizedMSE() # define the index samplers for data batch_size = 500 max_len = max( len(idx_modified) + len(idx_previously_modified), len(idx_negatives), len(idx_new_neighbors), len(idx_old_neighbors), len(idx_fix_points)) if max_len == len(idx_fix_points): n_batches = max_len / (batch_size * 2) + 1 else: n_batches = max_len / batch_size + 1 sampler_modified = torch.utils.data.BatchSampler( sampler=torch.utils.data.SubsetRandomSampler(idx_modified), batch_size=batch_size, drop_last=False) sampler_negatives = torch.utils.data.BatchSampler( sampler=torch.utils.data.SubsetRandomSampler(idx_negatives), batch_size=batch_size, drop_last=False) sampler_new_neighbors = torch.utils.data.BatchSampler( sampler=torch.utils.data.SubsetRandomSampler(idx_new_neighbors), batch_size=batch_size, drop_last=False) sampler_prev_modified = torch.utils.data.BatchSampler( sampler=torch.utils.data.SubsetRandomSampler(idx_previously_modified), batch_size=batch_size, drop_last=False) sampler_old_neighbors = torch.utils.data.BatchSampler( sampler=torch.utils.data.SubsetRandomSampler(idx_old_neighbors), batch_size=batch_size, drop_last=False) sampler_high_dim_neighbors = torch.utils.data.BatchSampler( sampler=torch.utils.data.SubsetRandomSampler(idx_high_dim_neighbors), batch_size=batch_size, drop_last=False) sampler_fixed = torch.utils.data.BatchSampler( sampler=torch.utils.data.SubsetRandomSampler(idx_fix_points), batch_size=2 * batch_size, drop_last=False) # train network until scheduler reduces learning rate to threshold value lr_threshold = 1e-5 track_l2_loss = ChangeRateLogger(n_track=5, threshold=5e-2, order='smaller') track_noise_reg = ChangeRateLogger( n_track=10, threshold=-1, order='smaller') # only consider order --> negative threshold stop_criterion = False embeddings = {} model_states = {} cpu_net = copy.deepcopy(net).cpu() if use_cuda else net model_states[0] = { 'epoch': 0, 'loss': float('inf'), 'state_dict': cpu_net.state_dict().copy(), 'optimizer': optimizer.state_dict().copy(), 'scheduler': scheduler.state_dict().copy() } embeddings[0] = old_embedding.numpy().copy() epoch = 1 new_features = feature.copy() new_embedding = old_embedding.numpy().copy() t_beta = [] t_train = [] t_tensorboard = [] t_save = [] t_send = [] t_iter = [] tensor_feature = torch.from_numpy(feature) norms = torch.norm(tensor_feature, p=2, dim=1) feature_norm = torch.mean(norms) norm_margin = norms.std() norm_criterion = SoftNormLoss(norm_value=feature_norm, margin=norm_margin) # distance_criterion = NormalizedDistanceLoss() # distance_criterion = ContrastiveNormalizedDistanceLoss(margin=0.2 * feature_norm) triplet_margin = feature_norm triplet_selector = SemihardNegativeTripletSelector( margin=triplet_margin, cpu=False, preselect_index_positives=10, preselect_index_negatives=1, selection='random') distance_criterion = TripletLoss(margin=triplet_margin, triplet_selector=triplet_selector) negative_triplet_collector = [] del norms while not stop_criterion: # if epoch < 30: # do not use dropout at first # net.eval() # else: net.train() t_iter_start = time.time() # compute beta for kl loss t_beta_start = time.time() kl_criterion._compute_beta(new_features) t_beta_end = time.time() t_beta.append(t_beta_end - t_beta_start) # set up losses l2_losses = AverageMeter() kl_losses = AverageMeter() distance_losses = AverageMeter() noise_regularization = AverageMeter() feature_norm = AverageMeter() norm_losses = AverageMeter() weight_regularization = AverageMeter() losses = AverageMeter() t_load = [] t_forward = [] t_loss = [] t_backprop = [] t_update = [] t_tot = [] # iterate over fix points (assume N_fixpoints >> N_modified) t_train_start = time.time() t_load_start = time.time() batch_loaders = [] for smplr in [ sampler_modified, sampler_negatives, sampler_new_neighbors, sampler_prev_modified, sampler_old_neighbors, sampler_fixed, sampler_high_dim_neighbors ]: batches = list(smplr) if len(batches) == 0: batches = [[] for i in range(n_batches)] while len(batches) < n_batches: to = min(n_batches - len(batches), len(batches)) batches.extend(list(smplr)[:to]) batch_loaders.append(batches) for batch_idx in range(n_batches): t_tot_start = time.time() moved_indices = batch_loaders[0][batch_idx] negatives_indices = batch_loaders[1][batch_idx] new_neigh_indices = batch_loaders[2][batch_idx] prev_moved_indices = batch_loaders[3][batch_idx] old_neigh_indices = batch_loaders[4][batch_idx] fixed_indices = batch_loaders[5][batch_idx] high_neigh_indices = batch_loaders[6][batch_idx] n_moved, n_neg, n_new, n_prev, n_old, n_fixed, n_high = ( len(moved_indices), len(negatives_indices), len(new_neigh_indices), len(prev_moved_indices), len(old_neigh_indices), len(fixed_indices), len(high_neigh_indices)) # load data indices = np.concatenate([ new_neigh_indices, moved_indices, negatives_indices, prev_moved_indices, fixed_indices, old_neigh_indices, high_neigh_indices ]).astype(long) if len(indices) < 3 * kl_criterion.perplexity + 2: continue data = tensor_feature[indices] input = torch.autograd.Variable( data.cuda()) if use_cuda else torch.autograd.Variable(data) t_load_end = time.time() t_load.append(t_load_end - t_load_start) # compute forward t_forward_start = time.time() fts_mod = net.mapping(input) # fts_mod_noise = net.mapping(input + 0.1 * torch.rand(input.shape).type_as(input)) fts_mod_noise = net.mapping(input + torch.rand(input.shape).type_as(input)) emb_mod = net.embedder(torch.nn.functional.relu(fts_mod)) t_forward_end = time.time() t_forward.append(t_forward_end - t_forward_start) # compute losses # modified --> KL, L2, Dist # new neighborhood --> KL, Dist # previously modified --> KL, L2 # old neighborhood + high dimensional neighborhood --> KL # fix point samples --> KL, L2 t_loss_start = time.time() noise_reg = noise_criterion(fts_mod, fts_mod_noise) noise_regularization.update(noise_reg.data, len(data)) kl_loss = kl_criterion(fts_mod, emb_mod, indices) kl_losses.update(kl_loss.data, len(data)) idx_l2_fixed = np.concatenate([ new_neigh_indices, moved_indices, negatives_indices, prev_moved_indices, fixed_indices ]).astype(long) l2_loss = torch.mean(l2_criterion( emb_mod[:n_new + n_moved + n_neg + n_prev + n_fixed], target_embedding[idx_l2_fixed].type_as(emb_mod)), dim=1) # weigh loss of space samples equally to all modified samples l2_loss = 0.5 * torch.mean(l2_loss[:n_new + n_moved + n_neg + n_prev]) + \ 0.5 * torch.mean(l2_loss[n_new + n_moved + n_neg + n_prev:]) l2_losses.update(l2_loss.data, len(idx_l2_fixed)) if epoch < 0: distance_loss = torch.tensor(0.) else: # distance_loss = distance_criterion(fts_mod[:n_new + n_moved]) distance_loss_input = fts_mod[:-(n_old + n_high)] if ( n_old + n_high) > 0 else fts_mod distance_loss_target = torch.cat([ torch.ones(n_new + n_moved), torch.zeros(n_neg + n_prev + n_fixed) ]) distance_loss_weights = torch.cat([ torch.ones(n_new + n_moved + n_neg + n_prev), 0.5 * torch.ones(n_fixed) ]) # also use high dimensional nn # distance_loss_weights = torch.cat([torch.ones(n_new+n_moved+n_neg+n_prev+n_fixed), 0.5*torch.ones(len(high_dim_nn))]) # if len(high_dim_nn) > 0: # distance_loss_input = torch.cat([distance_loss_input, fts_mod[high_dim_nn]]) # distance_loss_target = torch.cat([distance_loss_target, torch.ones(len(high_dim_nn))]) if n_neg > 0: selected_negatives = { 1: np.arange(n_new + n_moved, n_new + n_moved + n_neg) } else: selected_negatives = None distance_loss, negative_triplets = distance_criterion( distance_loss_input, distance_loss_target, concealed_classes=[0], weights=distance_loss_weights, selected_negatives=selected_negatives) if negative_triplets is not None: negative_triplets = np.unique(negative_triplets.numpy()) negative_triplets = indices[:-(n_old + n_high)][negative_triplets] negative_triplet_collector.extend(negative_triplets) distance_loss_noise, _ = distance_criterion( distance_loss_input + torch.rand(distance_loss_input.shape).type_as( distance_loss_input), distance_loss_target, concealed_classes=[0]) distance_loss = 0.5 * distance_loss + 0.5 * distance_loss_noise distance_losses.update(distance_loss.data, n_new + n_moved) # norm_loss = norm_criterion(torch.mean(fts_mod.norm(p=2, dim=1))) # norm_losses.update(norm_loss.data, len(data)) weight_reg = torch.autograd.Variable( torch.tensor(0.)).type_as(l2_loss) for param in net.mapping.parameters(): weight_reg += param.norm(1) weight_regularization.update(weight_reg, len(data)) loss = 1 * distance_loss.type_as(l2_loss) + 5 * l2_loss + 10 * kl_loss.type_as(l2_loss) + \ 1e-5 * weight_reg.type_as(l2_loss) #+ norm_loss.type_as(l2_loss)\ 1e3 * noise_reg.type_as(l2_loss) losses.update(loss.data, len(data)) t_loss_end = time.time() t_loss.append(t_loss_end - t_loss_start) feature_norm.update( torch.mean(fts_mod.norm(p=2, dim=1)).data, len(data)) # backprop t_backprop_start = time.time() optimizer.zero_grad() loss.backward() optimizer.step() t_backprop_end = time.time() t_backprop.append(t_backprop_end - t_backprop_start) # update t_update_start = time.time() # update current embedding new_embedding[indices] = emb_mod.data.cpu().numpy() t_update_end = time.time() t_update.append(t_update_end - t_update_start) if epoch > 5 and (batch_idx + 1) * batch_size >= 2000: print('\tend epoch after {} random fix point samples'.format( (batch_idx + 1) * batch_size)) break t_tot_end = time.time() t_tot.append(t_tot_end - t_tot_start) t_load_start = time.time() # print('Times:' # '\n\tLoader: {})' # '\n\tForward: {})' # '\n\tLoss: {})' # '\n\tBackprop: {})' # '\n\tUpdate: {})' # '\n\tTotal: {})'.format( # np.mean(t_load), # np.mean(t_forward), # np.mean(t_loss), # np.mean(t_backprop), # np.mean(t_update), # np.mean(t_tot), # )) t_train_end = time.time() t_train.append(t_train_end - t_train_start) t_tensorboard_start = time.time() scheduler.step(losses.avg) label[np.unique(negative_triplet_collector), -1] = 'negative triplet' log.write('l2_loss', float(l2_losses.avg), epoch, test=False) log.write('distance_loss', float(distance_losses.avg), epoch, test=False) log.write('kl_loss', float(kl_losses.avg), epoch, test=False) log.write('noise_regularization', float(noise_regularization.avg), epoch, test=False) log.write('feature_norm', float(feature_norm.avg), epoch, test=False) log.write('norm_loss', float(norm_losses.avg), epoch, test=False) log.write('weight_reg', float(weight_regularization.avg), epoch, test=False) log.write('loss', float(losses.avg), epoch, test=False) t_tensorboard_end = time.time() t_tensorboard.append(t_tensorboard_end - t_tensorboard_start) t_save_start = time.time() cpu_net = copy.deepcopy(net).cpu() if use_cuda else net model_states[epoch] = { 'epoch': epoch, 'loss': losses.avg.cpu(), 'state_dict': cpu_net.state_dict().copy(), 'optimizer': optimizer.state_dict().copy(), 'scheduler': scheduler.state_dict().copy() } embeddings[epoch] = new_embedding t_save_end = time.time() t_save.append(t_save_end - t_save_start) print('Train Epoch: {}\t' 'Loss: {:.4f}\t' 'L2 Loss: {:.4f}\t' 'Distance Loss: {:.4f}\t' 'KL Loss: {:.4f}\t' 'Noise Regularization: {:.4f}\t' 'Weight Regularization: {:.4f}\t' 'LR: {:.6f}'.format(epoch, float(losses.avg), float(5 * l2_losses.avg), float(0.5 * distance_losses.avg), float(10 * kl_losses.avg), float(noise_regularization.avg), float(1e-5 * weight_regularization.avg), optimizer.param_groups[-1]['lr'])) t_send_start = time.time() # send to server if socket_id is not None: position = new_embedding if scale_func is None else scale_func( new_embedding) nodes = make_nodes(position=position, index=True, label=label) send_payload(nodes, socket_id, categories=categories) t_send_end = time.time() t_send.append(t_send_end - t_send_start) epoch += 1 l2_stop_criterion = track_l2_loss.add_value(l2_losses.avg) epoch_stop_criterion = epoch > 150 regularization_stop_criterion = False #track_noise_reg.add_value(noise_regularization.avg) lr_stop_criterion = optimizer.param_groups[-1]['lr'] < lr_threshold stop_criterion = any([ l2_stop_criterion, regularization_stop_criterion, lr_stop_criterion, epoch_stop_criterion ]) t_iter_end = time.time() t_iter.append(t_iter_end - t_iter_start) print('Times:' '\n\tBeta: {})' '\n\tTrain: {})' '\n\tTensorboard: {})' '\n\tSave: {})' '\n\tSend: {})' '\n\tIteration: {})'.format( np.mean(t_beta), np.mean(t_train), np.mean(t_tensorboard), np.mean(t_save), np.mean(t_send), np.mean(t_iter), )) print('Training details: ' '\n\tMean: {}' '\n\tMax: {} ({})' '\n\tMin: {} ({})'.format(np.mean(t_train), np.max(t_train), np.argmax(t_train), np.min(t_train), np.argmin(t_train))) previously_modified = np.append(previously_modified, idx_modified) # compute new features new_features = get_feature(net.mapping, feature) # print('Save output files...') # write output files for the cycle outfile_config = os.path.join(outpath_config, 'cycle_{:03d}_config.pkl'.format(cycle)) outfile_embedding = os.path.join( outpath_embedding, 'cycle_{:03d}_embeddings.hdf5'.format(cycle)) outfile_feature = os.path.join(outpath_feature, 'cycle_{:03d}_feature.hdf5'.format(cycle)) outfile_model_states = os.path.join( outpath_model, 'cycle_{:03d}_models.pth.tar'.format(cycle)) with h5py.File(outfile_embedding, 'w') as f: f.create_dataset(name='image_id', shape=image_id.shape, dtype=image_id.dtype, data=image_id) for epoch in embeddings.keys(): data = embeddings[epoch] f.create_dataset(name='epoch_{:04d}'.format(epoch), shape=data.shape, dtype=data.dtype, data=data) print('\tSaved {}'.format(os.path.join(os.getcwd(), outfile_embedding))) with h5py.File(outfile_feature, 'w') as f: f.create_dataset(name='feature', shape=new_features.shape, dtype=new_features.dtype, data=new_features) f.create_dataset(name='image_id', shape=image_id.shape, dtype=image_id.dtype, data=image_id) print('\tSaved {}'.format(os.path.join(os.getcwd(), outfile_feature))) torch.save(model_states, outfile_model_states) print('\tSaved {}'.format(os.path.join(os.getcwd(), outfile_model_states))) # write config file config_dict = { 'idx_modified': idx_modified, 'idx_old_neighbors': idx_old_neighbors, 'idx_new_neighbors': idx_new_neighbors, 'idx_high_dim_neighbors': idx_high_dim_neighbors } with open(outfile_config, 'w') as f: pickle.dump(config_dict, f) print('\tSaved {}'.format(os.path.join(os.getcwd(), outfile_config))) print('Done.') print('Finished training.') return new_embedding
print(conv2d.weight.data) print(conv2d.bias.data) # we use nn.Conv2d class to realize 2d convolution # in_channels number of channels of input image (means colors) # out_channels number of channels produced by the convolution # kernel_size (int or tuple) size of the convolving kernel # stride = step (int or tuple, optional) stride of the convolution default:1 # padding (int, tuple, optional) zero-padding added to both side of the input default: 0 # bias(bool, optional) -If true, adds a learnable bias to the output. Default: True # forward function has four dimensions parameters(N(batch_size), Cin, Hin, Win) # and the return value is also a 4 dimensions (N, Cout, Hout, Wout) X = torch.rand(4, 2, 3, 5) conv2d = nn.Conv2d(in_channels=2, out_channels=3, kernel_size=(3, 5), stride=1, padding=(1, 2)) Y = conv2d(X) print('Y.shape: ', Y.shape) print('weight.shape: ', conv2d.weight.shape) print('bias.shape: ', conv2d.bias.shape) # pooling in order to mitigate over sensitivity of the convolution layer # usually we have max pooling or average pooling X = torch.arange(32, dtype=torch.float32).view(1, 2, 4, 4) pool2d = nn.MaxPool2d(kernel_size=3, padding=1, stride=(2, 1)) Y = pool2d(X) print(X)
# Tensor of range(#frames): produces a sequence to be later compared with the discovered indices idx_e2 = torch.tensor([i for i in range(dims_1[2])]).unsqueeze(0).repeat(dims_1[0],1) # Find matching sequences per batch e1toe2 = torch.sum(idx_exp==idx_e2,dim=-1)==dims_1[2] # recursion for cyclic-back if check==2: return e1toe2 else: e2toe1 = soft_nnc(embeddings2,embeddings1,check=2) # join together conditions = e1toe2+e2toe1 # return only the batch indices return torch.where(conditions==True)[0] if __name__ == "__main__": e1 = torch.rand(10,32,16) e2 = torch.rand(10,32,16) e3 = e1.clone() cyclic_c = soft_nnc(e1,e2) print('Test 1: e1 != e2 (indices):',cyclic_c.numpy(),'\n') cyclic_c = soft_nnc(e1,e3) print('Test 2: e1 == e3 (indices):',cyclic_c.numpy(),'\n')
def _dummy_image_loader(_): return torch.rand(3, 196, 196)
def __init__(self, config): super().__init__() kernel_weight = torch.rand([config.model.hidden_size, config.model.head_num, config.model.head_dim]) self.kernel = nn.Parameter(kernel_weight)
def rand(self, shape): return self.move(torch.rand(shape))
def test(): import torch import matplotlib.pyplot as plt from matplotlib.patches import Circle import numpy as np from itertools import product, combinations from mpl_toolkits.mplot3d import Axes3D X = torch.randn(100, 2) * 0.5 + (torch.rand(1, 2).expand(100, 2) - 0.5) * 3 xn = X.norm(2, -1) X[xn > 1] /= ((xn[xn > 1]).unsqueeze(-1).expand( (xn[xn > 1]).shape[0], 2) + 1e-3) mu = barycenter(X) ax = plt.subplot() p = Circle((0, 0), 1, edgecolor='b', lw=1, facecolor='none') ax.add_patch(p) plt.scatter(X[:, 0].numpy(), X[:, 1].numpy()) plt.scatter(mu[0, 0].item(), mu[0, 1].item(), label="Poincare barycenter", marker="s", c="red", s=100.) plt.scatter(X.mean(0)[0].item(), X.mean(0)[1].item(), label="Euclidean barycenter", marker="s", c="green", s=100.) plt.legend() plt.show() print("3D") fig = plt.figure() ax = fig.gca(projection='3d') ax.set_aspect("equal") # draw sphere u, v = np.mgrid[0:2 * np.pi:20j, 0:np.pi:10j] x = np.cos(u) * np.sin(v) y = np.sin(u) * np.sin(v) z = np.cos(v) ax.plot_wireframe(x, y, z, color="r") X = torch.randn(100, 3) * 0.3 + (torch.rand(1, 3).expand(100, 3) - 0.5) * 3 xn = X.norm(2, -1) X[xn > 1] /= ((xn[xn > 1]).unsqueeze(-1).expand( (xn[xn > 1]).shape[0], 3) + 1e-3) mu = barycenter(X) ax.scatter(X[:, 0].numpy(), X[:, 1].numpy(), X[:, 2].numpy()) ax.scatter(mu[0, 0].item(), mu[0, 1].item(), mu[0, 2].item(), label="Poincare barycenter", marker="s", c="red", s=100.) ax.scatter(X.mean(0)[0].item(), X.mean(0)[1].item(), X.mean(0)[2].item(), label="Euclidean barycenter", marker="s", c="green", s=100.) ax.legend() plt.show()
def rand_uniform(shape, min_value, max_value): return torch.rand(shape) * (max_value - min_value) + min_value
def __init__(self, embeddings_shape, device, parser, pad_action, opt, n_features=768): """ Initialize the parser model. @param embeddings (Tensor): word embeddings (num_words, embedding_size) @param n_features (int): number of input features @param hidden_size (int): number of hidden units @param n_classes (int): number of output classes @param dropout_prob (float): dropout probability """ super(ParserModel, self).__init__() ## initialization of parameters self.n_features = n_features self.n_classes = opt.nclass self.dropout_prob = opt.ffdropout self.hidden_size = opt.ffhidden self.embedding_size = embeddings_shape self.batch_size = opt.batchsize self.device = device self.n_layers_history = opt.nlayershistory self.max_step_length = opt.maxsteplength self.parser = parser self.pad_action = pad_action['P'] self.num_labels = parser.n_transit-1 self.hidden_size_label = opt.hiddensizelabel self.pooling_hid = opt.poolinghid self.fhistmodel = opt.fhistmodel self.use_justexist = opt.use_justexist ## initialization of embedding and bert model if opt.fcompmodel or opt.graphinput: self.label_emb = nn.Embedding(self.num_labels+1,self.n_features,padding_idx=self.num_labels) else: self.label_emb= None bertconfig = BertConfig(self.embedding_size, parser.n_transit-1, opt.labelemb, parser.P_NULL,opt.graphinput, opt.nattentionlayer,opt.nattentionheads,opt.fcompmodel,opt.seppoint, self.label_emb,opt.layernorm,opt.use_topbuffer,opt.use_justexist, opt.embsize,4*opt.embsize) self.bertmodel = BertModel(bertconfig) if opt.withbert: state_dict = torch.load('small_bert'+str(opt.outputname)) self.bertmodel.load_state_dict(state_dict,strict=False) del state_dict else: state_dict_position = torch.load('position'+str(opt.outputname)) self.bertmodel.embeddings.position_embeddings.load_state_dict(state_dict_position) if not opt.graphinput: state_dict_token = torch.load('token_type'+str(opt.outputname)) self.bertmodel.embeddings.token_type_embeddings.load_state_dict(state_dict_token) del state_dict_token state_dict_word = torch.load('word_emb'+str(opt.outputname)) self.bertmodel.embeddings.word_embeddings.load_state_dict(state_dict_word) del state_dict_position, state_dict_word ############################################################################################ if opt.graphinput or opt.fcompmodel: self.bertmodel.embeddings.label_emb.weight[parser.n_transit-1].data.fill_(0.0) self.bertmodel.embeddings.word_embeddings.weight[parser.P_NULL].data.fill_(0.0) ############################################################################################ ### initialization of lstm history model if self.fhistmodel: self.hist_size = opt.histsize self.action_emb = nn.Embedding(self.n_classes+1, self.hist_size) self.history = HistoryLSTM(self.hist_size,self.hist_size,self.n_layers_history) self.dtype = torch.cuda.FloatTensor if cuda.is_available() else torch.FloatTensor self.h0 = nn.Parameter(torch.rand(self.hist_size,requires_grad=True).type(self.dtype)) self.c0 = nn.Parameter(torch.rand(self.hist_size,requires_grad=True).type(self.dtype)) ## initialization of classifer if self.fhistmodel: self.embed_to_hidden = nn.Linear(self.n_features+self.hist_size, self.hidden_size) else: self.embed_to_hidden = nn.Linear(self.n_features, self.hidden_size) nn.init.xavier_uniform_(self.embed_to_hidden.weight) self.relu = nn.LeakyReLU() self.dropout = nn.Dropout(self.dropout_prob) self.hidden_to_logits = nn.Linear(self.hidden_size, self.n_classes) nn.init.xavier_uniform_(self.hidden_to_logits.weight) ## initializtion of label-classifier if self.fhistmodel: self.label_classifier = LabelClassifier(self.n_features+self.hist_size, self.hidden_size_label,self.num_labels) else: self.label_classifier = LabelClassifier(self.n_features, self.hidden_size_label,self.num_labels)
assert imgs.shape == (2, 3, 196, 196) assert labels.shape == (2, 4) torch.testing.assert_allclose(labels, torch.tensor(valid_labels)) data = next(iter(dm.test_dataloader())) imgs, labels = data["input"], data["target"] assert imgs.shape == (2, 3, 196, 196) assert labels.shape == (2, 4) torch.testing.assert_allclose(labels, torch.tensor(test_labels)) @pytest.mark.skipif(not _IMAGE_TESTING, reason="image libraries aren't installed.") @pytest.mark.parametrize( "data,from_function", [ (torch.rand(3, 3, 196, 196), ImageClassificationData.from_tensors), (np.random.rand(3, 3, 196, 196), ImageClassificationData.from_numpy), ], ) def test_from_data(data, from_function): img_data = from_function( train_data=data, train_targets=[0, 3, 6], val_data=data, val_targets=[1, 4, 7], test_data=data, test_targets=[2, 5, 8], batch_size=2, num_workers=0, )
def train(args, generator, discriminator): step = int(math.log2(args.max_size)) - 2 #-> 1 resolution = 4 * 2 ** step batch_size = args.batch.get(resolution, args.batch_default) dataset = MultiResolutionDataset(args.path, transform, resolution=resolution) loader = sample_data( dataset, batch_size, resolution ) data_loader = iter(loader) adjust_lr(g_optimizer, args.lr.get(resolution, 0.001)) adjust_lr(d_optimizer, args.lr.get(resolution, 0.001)) pbar = tqdm(range(3000000)) requires_grad(generator, False) requires_grad(discriminator, True) disc_loss_val = 0 gen_loss_val = 0 grad_loss_val = 0 alpha = 0 used_sample = 0 #-> how many images has been used max_step = int(math.log2(args.max_size)) - 2 #-> log2(1024) - 2 = 8 final_progress = False for i in pbar: discriminator.zero_grad() alpha = min(1, 1 / args.phase * (used_sample + 1)) #-> min(1, (cur+1)/60_0000) #-> when more than 60_0000 sampels is used, alpha will be in const to 1.0 #-> which means we the "skip_rgb" will not be applied if (resolution == args.init_size and args.ckpt is None) or final_progress: alpha = 1 #-> also, if initially, no previous outputs for skip-connection if used_sample > args.phase * 2: #-> if > 1_200_000 ## num_of_epoch_each_phase = args.phase * 2 / training_dataset_size used_sample = 0 step += 1 if step > max_step: step = max_step final_progress = True ckpt_step = step + 1 else: alpha = 0 ckpt_step = step resolution = 4 * 2 ** step_D loader = sample_data( dataset, args.batch.get(resolution, args.batch_default), resolution ) data_loader = iter(loader) torch.save( { 'generator': generator.module.state_dict(), 'discriminator': discriminator.module.state_dict(), 'g_optimizer': g_optimizer.state_dict(), 'd_optimizer': d_optimizer.state_dict(), 'g_running': g_running.state_dict(), }, r'checkpoint/train_step-{}.model'.format(ckpt_step)) adjust_lr(g_optimizer, args.lr.get(resolution, 0.001)) adjust_lr(d_optimizer, args.lr.get(resolution, 0.001)) #### update discriminator try: real_image = next(data_loader) except (OSError, StopIteration): data_loader = iter(loader) real_image = next(data_loader) used_sample += real_image.shape[0] b_size = real_image.size(0) # get sample coords coord_handler.batch_size = b_size patch_handler.batch_size = b_size d_macro_coord_real, g_micro_coord_real, _ = coord_handler._euclidean_sample_coord() d_macro_coord_fake1, g_micro_coord_fake1, _ = coord_handler._euclidean_sample_coord() d_macro_coord_fake2, g_micro_coord_fake2, _ = coord_handler._euclidean_sample_coord() d_macro_coord_real = torch.from_numpy(d_macro_coord_real).float().cuda() d_macro_coord_fake1, g_micro_coord_fake1 = torch.from_numpy(d_macro_coord_fake1).float().cuda(), torch.from_numpy(g_micro_coord_fake1).float().cuda() d_macro_coord_fake2, g_micro_coord_fake2 = torch.from_numpy(d_macro_coord_fake2).float().cuda(), torch.from_numpy(g_micro_coord_fake2).float().cuda() select = np.hstack([[i*b_size+j for i in range(num_micro_in_macro)] for j in range(b_size)]) real_image = real_image.cuda() real_macro = micros_to_macro(patch_handler.crop_micro_from_full_gpu(real_image, g_micro_coord_real[:, 1:2], g_micro_coord_real[:, 0:1]), config["data_params"]["ratio_macro_to_micro"]) if args.loss == 'wgan-gp': real_predict = discriminator(real_macro, d_macro_coord_real, step=step_D, alpha=alpha) real_predict = real_predict.mean() - 0.001 * (real_predict ** 2).mean() (-real_predict).backward() elif args.loss == 'r1': real_macro.requires_grad = True real_scores = discriminator(real_macro, d_macro_coord_real, step=step_D, alpha=alpha) real_predict = F.softplus(-real_scores).mean() real_predict.backward(retain_graph=True) grad_real = grad( outputs=real_scores.sum(), inputs=real_macro, create_graph=True )[0] grad_penalty = ( grad_real.view(grad_real.size(0), -1).norm(2, dim=1) ** 2 ).mean() grad_penalty = 10 / 2 * grad_penalty grad_penalty.backward() if i%10 == 0: grad_loss_val = grad_penalty.item() if args.mixing and random.random() < 0.9: gen_in11, gen_in12, gen_in21, gen_in22 = torch.randn( 4, b_size, code_size-2, device='cuda' ).chunk(4, 0) gen_in11 = gen_in11.squeeze(0) gen_in11 = torch.cat([gen_in11.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake1], dim=1) gen_in12 = gen_in12.squeeze(0) gen_in12 = torch.cat([gen_in12.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake1], dim=1) gen_in21 = gen_in21.squeeze(0) gen_in21 = torch.cat([gen_in21.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake2], dim=1) gen_in22 = gen_in22.squeeze(0) gen_in22 = torch.cat([gen_in22.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake2], dim=1) gen_in1 = [gen_in11, gen_in12] gen_in2 = [gen_in21, gen_in22] else: gen_in1, gen_in2 = torch.randn(2, b_size, code_size-2, device='cuda').chunk( 2, 0 # 512 ) gen_in1 = gen_in1.squeeze(0)# (B, 254) gen_in2 = gen_in2.squeeze(0)# (B, 254) # repeat and copy gen_in1 = torch.cat([gen_in1.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake1], dim=1) gen_in2 = torch.cat([gen_in2.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake2], dim=1) fake_image = generator(gen_in1, step=step_G, alpha=alpha) fake_image = micros_to_macro(fake_image, config["data_params"]["ratio_macro_to_micro"]) fake_predict = discriminator(fake_image, d_macro_coord_fake1, step=step_D, alpha=alpha) if args.loss == 'wgan-gp': fake_predict = fake_predict.mean() fake_predict.backward() eps = torch.rand(b_size, 1, 1, 1).cuda() x_hat = eps * real_image.data + (1 - eps) * fake_image.data x_hat.requires_grad = True hat_predict = discriminator(x_hat, step=step_D, alpha=alpha) grad_x_hat = grad( outputs=hat_predict.sum(), inputs=x_hat, create_graph=True )[0] grad_penalty = ( (grad_x_hat.view(grad_x_hat.size(0), -1).norm(2, dim=1) - 1) ** 2 ).mean() grad_penalty = 10 * grad_penalty grad_penalty.backward() if i%10 == 0: grad_loss_val = grad_penalty.item() disc_loss_val = (real_predict - fake_predict).item() elif args.loss == 'r1': fake_predict = F.softplus(fake_predict).mean() fake_predict.backward() if i%10 == 0: disc_loss_val = (real_predict + fake_predict).item() d_optimizer.step() #### update generator if (i + 1) % n_critic == 0: generator.zero_grad() requires_grad(generator, True) requires_grad(discriminator, False) fake_image = generator(gen_in2, step=step_G, alpha=alpha) fake_image = micros_to_macro(fake_image, config["data_params"]["ratio_macro_to_micro"]) predict = discriminator(fake_image, d_macro_coord_fake2, step=step_D, alpha=alpha) if args.loss == 'wgan-gp': loss = -predict.mean() elif args.loss == 'r1': loss = F.softplus(-predict).mean() if i%10 == 0: gen_loss_val = loss.item() loss.backward() g_optimizer.step() accumulate(g_running, generator.module) requires_grad(generator, False) requires_grad(discriminator, True) #### validation if (i + 1) % 100 == 0: images = [] gen_i, gen_j = args.gen_sample.get(resolution, (10, 5)) coord_handler.batch_size = gen_i * gen_j _, g_micro_coord_val, _ = coord_handler._euclidean_sample_coord() g_micro_coord_val = torch.from_numpy(g_micro_coord_val).float().cuda() #print(g_micro_coord_val.shape) select = np.hstack([[i*gen_j+j for i in range(num_micro_in_macro)] for j in range(gen_j)]) with torch.no_grad(): for ii in range(gen_i): style = torch.randn(gen_j, code_size-2).cuda().repeat(num_micro_in_macro, 1)[select] #print(style.size()) coords = g_micro_coord_val[ii*gen_j*num_micro_in_macro:(ii+1)*gen_j*num_micro_in_macro] #print(coords.size()) style = torch.cat([style, coords], dim=1) image = g_running(style, step=step_G, alpha=alpha).data.cpu() image = micros_to_macro(image, config['data_params']['ratio_macro_to_micro']) images.append( image ) utils.save_image( torch.cat(images, 0), r'sample_spatialR/%06d.png'%(i+1), nrow=gen_i, normalize=True, range=(-1, 1), ) if (i + 1) % 10000 == 0: torch.save( g_running.state_dict(), r'checkpoint/%06d.model'%(i+1) ) state_msg = ( r'Size: {}; G: {:.3f}; D: {:.3f}; Grad: {:.3f}; Alpha: {:.5f}'.format(4 * 2 ** step, gen_loss_val, disc_loss_val, grad_loss_val, alpha) ) pbar.set_description(state_msg)
def test_roi_align(self): x = torch.rand(1, 1, 10, 10, dtype=torch.float32) single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32) model = ops.RoIAlign((5, 5), 1, 2) self.run_model(model, [(x, single_roi)])
from util import timeit,get_logger import random from all_model import AllModel from process_data import ProcessData,split_train_and_valid,split_train_and_test from feat import Feat VERBOSITY_LEVEL = 'INFO' LOGGER = get_logger(VERBOSITY_LEVEL, __file__) pd.set_option('display.max_columns', 100) pd.set_option('display.max_rows', 100) import time s = time.time() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') matrix_a = torch.rand((100,100)) matrix_b = torch.rand((100,100)) torch.mm(torch.Tensor(matrix_a).to(device),torch.Tensor(matrix_b).to(device)).cpu().numpy() LOGGER.info(f'init torch.mm:{time.time()-s}s') SEED = 2020 #split_mode = 'stratified','stratified_cv','shuffle_split' split_mode='stratified_cv' offline = True if offline: try: import prettytable as pt except: os.system('pip install prettytable') import prettytable as pt
import time import torch import torch.nn as nn # 2、线性回归范例: # 准备数据: n = 1000000 x = 10 * torch.rand([n, 2]) - 5.0 w0 = torch.tensor([[2.0, -3.0]]) b0 = torch.tensor([[10.0]]) y = x @ w0.t() + b0 + torch.normal(0.0, 2.0, size=[n, 1]) # 增加正态扰动 # 移动到GPU: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') x = x.cuda() y = y.cuda() # 定义模型 class LinearRegression(nn.Module): def __init__(self): super().__init__() self.w = nn.Parameter(torch.randn_like(w0)) self.b = nn.Parameter(torch.zeros_like(b0)) def forward(self, x): return x @ self.w.t() + self.b linear = LinearRegression()
def main(): # set the path to pre-trained model and output args.outf = args.outf + args.net_type + '_' + args.dataset + '/' if os.path.isdir(args.outf) == False: os.mkdir(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) out_dist_list = [ 'skin_cli', 'skin_derm', 'corrupted', 'corrupted_70', 'imgnet', 'nct', 'final_test' ] # load networks if args.net_type == 'densenet_121': model = densenet_121.Net(models.densenet121(pretrained=False), 8) ckpt = torch.load("../checkpoints/densenet-121/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() elif args.net_type == 'mobilenet': model = mobilenet.Net(models.mobilenet_v2(pretrained=False), 8) ckpt = torch.load("../checkpoints/mobilenet/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") elif args.net_type == 'resnet_50': model = resnet_50.Net(models.resnet50(pretrained=False), 8) ckpt = torch.load("../checkpoints/resnet-50/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") elif args.net_type == 'vgg_16': model = vgg_16.Net(models.vgg16_bn(pretrained=False), 8) ckpt = torch.load("../checkpoints/vgg-16/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") else: raise Exception(f"There is no net_type={args.net_type} available.") in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print('load model: ' + args.net_type) # load dataset print('load target data: ', args.dataset) train_loader, test_loader = data_loader.getTargetDataSet( args.dataset, args.batch_size, in_transform, args.dataroot) # set information about feature extaction model.eval() temp_x = torch.rand(2, 3, 224, 224).cuda() temp_x = Variable(temp_x) temp_list = model.feature_list(temp_x)[1] num_output = len(temp_list) feature_list = np.empty(num_output) count = 0 for out in temp_list: feature_list[count] = out.size(1) count += 1 print('get sample mean and covariance') sample_mean, precision = lib_generation.sample_estimator( model, args.num_classes, feature_list, train_loader) print('get Mahalanobis scores') m_list = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005] for magnitude in m_list: print('Noise: ' + str(magnitude)) for i in range(num_output): M_in = lib_generation.get_Mahalanobis_score(model, test_loader, args.num_classes, args.outf, \ True, args.net_type, sample_mean, precision, i, magnitude) M_in = np.asarray(M_in, dtype=np.float32) if i == 0: Mahalanobis_in = M_in.reshape((M_in.shape[0], -1)) else: Mahalanobis_in = np.concatenate( (Mahalanobis_in, M_in.reshape((M_in.shape[0], -1))), axis=1) for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet( out_dist, args.batch_size, in_transform, args.dataroot) print('Out-distribution: ' + out_dist) for i in range(num_output): M_out = lib_generation.get_Mahalanobis_score(model, out_test_loader, args.num_classes, args.outf, \ False, args.net_type, sample_mean, precision, i, magnitude) M_out = np.asarray(M_out, dtype=np.float32) if i == 0: Mahalanobis_out = M_out.reshape((M_out.shape[0], -1)) else: Mahalanobis_out = np.concatenate( (Mahalanobis_out, M_out.reshape((M_out.shape[0], -1))), axis=1) Mahalanobis_in = np.asarray(Mahalanobis_in, dtype=np.float32) Mahalanobis_out = np.asarray(Mahalanobis_out, dtype=np.float32) Mahalanobis_data, Mahalanobis_labels = lib_generation.merge_and_generate_labels( Mahalanobis_out, Mahalanobis_in) file_name = os.path.join( args.outf, 'Mahalanobis_%s_%s_%s.npy' % (str(magnitude), args.dataset, out_dist)) Mahalanobis_data = np.concatenate( (Mahalanobis_data, Mahalanobis_labels), axis=1) np.save(file_name, Mahalanobis_data)
SWITCH_WORDS = False SPEECH_FILE = os.path.join("data", CORPUS_NAME, "Clinton_2016-07-28.txt") # with open(MODEL_CHECKPOINT, 'rb') as f: model = torch.load(f) if USE_CUDA: model.cuda() else: model.cpu() corpus = data.Corpus(CORPUS_NAME) glove_embedding = glove.GloveEmbedding(corpus.vocabulary) ntokens = corpus.vocabulary.num_words hidden = model.init_hidden(1) input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True) if USE_CUDA: input.data = input.data.cuda() words = '' # read speech file for initialization if SPEECH_FILE is not None: speech_for_gen = torch.LongTensor(30) with open(SPEECH_FILE, 'r', encoding="utf8") as f: token = 0 for line in f: if token == 30: break twords = data.normalizeString(line).split() + ['EOS'] if len(twords) > 1:
def __getitem__(self, index): img = torch.rand(*self.shape) target = 0 # Dummy target value return F.normalize(img, normalizing_mean, normalizing_std), target
def __init__(self, dev): super().__init__() n = 8 # Utility arguments, created as one-element tuples pointwise0_fp16 = (torch.randn(n, dtype=torch.float16, device=dev),) pointwise1_fp16 = (torch.randn(n, dtype=torch.float16, device=dev),) pointwise2_fp16 = (torch.randn(n, dtype=torch.float16, device=dev),) mat0_fp16 = (torch.randn((n, n), dtype=torch.float16, device=dev),) mat1_fp16 = (torch.randn((n, n), dtype=torch.float16, device=dev),) mat2_fp16 = (torch.randn((n, n), dtype=torch.float16, device=dev),) dimsets = ((n, n, n), (n, n, n, n), (n, n, n, n, n)) conv_args_fp32 = [(torch.randn(dimset, dtype=torch.float32, device=dev), torch.randn(dimset, dtype=torch.float32, device=dev)) for dimset in dimsets] bias_fp32 = (torch.randn((n,), dtype=torch.float32, device=dev),) element0_fp32 = (torch.randn(1, dtype=torch.float32, device=dev),) pointwise0_fp32 = (torch.randn(n, dtype=torch.float32, device=dev),) pointwise1_fp32 = (torch.randn(n, dtype=torch.float32, device=dev),) mat0_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),) mat1_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),) mat2_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),) mat3_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),) # The lists below organize ops that autocast needs to test. # self.list_name corresponds to test_autocast_list_name in test/test_cuda.py. # Each op is associated with a tuple of valid arguments. # In addition, cudnn conv ops are not supported on ROCm and hence will # be skipped by passing TEST_WITH_ROCM flag to those ops in self.torch_fp16 list. # Some ops implement built-in type promotion. These don't need autocasting, # but autocasting relies on their promotion, so we include tests to double-check. self.torch_expect_builtin_promote = [ ("eq", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("ge", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("gt", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("le", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("lt", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("ne", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("add", pointwise0_fp32 + pointwise1_fp16, torch.float32), ("div", pointwise0_fp32 + pointwise1_fp16, torch.float32), ("mul", pointwise0_fp32 + pointwise1_fp16, torch.float32), ("cat", (pointwise0_fp16 + pointwise1_fp32,), torch.float32), ("equal", pointwise0_fp32 + pointwise1_fp16, torch.float32), ("stack", (pointwise0_fp16 + pointwise1_fp32,), torch.float32), ] self.methods_expect_builtin_promote = [ ("__eq__", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("__ge__", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("__gt__", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("__le__", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("__lt__", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("__ne__", pointwise0_fp32 + pointwise1_fp16, torch.bool), ("__add__", pointwise0_fp32 + pointwise1_fp16, torch.float32), ("__div__", pointwise0_fp32 + pointwise1_fp16, torch.float32), ("__mul__", pointwise0_fp32 + pointwise1_fp16, torch.float32), ] # The remaining lists organize ops that autocast treats explicitly. self.torch_fp16 = [ # deprecated _convolution ("_convolution", conv_args_fp32[1] + bias_fp32 + ((1, 1), (0, 0), (1, 1), False, (0, 0), 1, False, True, True)), # the current _convolution ("_convolution", conv_args_fp32[1] + bias_fp32 + ((1, 1), (0, 0), (1, 1), False, (0, 0), 1, False, True, True, True)), ("conv1d", conv_args_fp32[0]), ("conv2d", conv_args_fp32[1]), ("conv3d", conv_args_fp32[2]), ("conv_tbc", conv_args_fp32[0] + bias_fp32), ("conv_transpose1d", conv_args_fp32[0]), ("conv_transpose2d", conv_args_fp32[1]), ("conv_transpose3d", conv_args_fp32[2]), ("convolution", conv_args_fp32[1] + bias_fp32 + ((1, 1), (0, 0), (1, 1), False, (0, 0), 1)), ("cudnn_convolution", conv_args_fp32[1] + ((0, 0), (1, 1), (1, 1), 1, False, True, True), TEST_WITH_ROCM), ("cudnn_convolution_transpose", conv_args_fp32[1] + ((0, 0), (0, 0), (1, 1), (1, 1), 1, False, True, True), TEST_WITH_ROCM), ("prelu", pointwise0_fp32 + element0_fp32), ("addmm", mat1_fp32 + mat2_fp32 + mat3_fp32), ("addmv", pointwise0_fp32 + mat2_fp32 + pointwise1_fp32), ("addr", mat0_fp32 + pointwise0_fp32 + pointwise1_fp32), ("matmul", mat0_fp32 + mat1_fp32), ("einsum", "bkhd,bqhd->bqkh", mat0_fp32 + mat1_fp32), ("mm", mat0_fp32 + mat1_fp32), ("mv", mat0_fp32 + pointwise0_fp32), ("chain_matmul", mat0_fp32 + mat1_fp32 + mat2_fp32), ("addbmm", mat0_fp32 + (torch.randn((n, n, n), device=dev, dtype=torch.float32), torch.randn((n, n, n), device=dev, dtype=torch.float32))), ("baddbmm", (torch.randn((n, n, n), device=dev, dtype=torch.float32), torch.randn((n, n, n), device=dev, dtype=torch.float32), torch.randn((n, n, n), device=dev, dtype=torch.float32))), ("bmm", (torch.randn((n, n, n), device=dev, dtype=torch.float32), torch.randn((n, n, n), device=dev, dtype=torch.float32))), # _thnn_fused_lstm_cell and _thnn_fused_gru_cell are not Python-exposed as far as I can tell. # ("_thnn_fused_lstm_cell", mat0_fp32 + mat1_fp32 + mat2_fp32 + pointwise0_fp32 + pointwise1_fp32), # ("_thnn_fused_gru_cell", mat0_fp32 + mat1_fp32 + mat2_fp32 + pointwise0_fp32 + pointwise1_fp32), ("lstm_cell", self._rnn_cell_args(n, num_chunks=4, is_lstm=True, dev=dev, dtype=torch.float32)), ("gru_cell", self._rnn_cell_args(n, num_chunks=3, is_lstm=False, dev=dev, dtype=torch.float32)), ("rnn_tanh_cell", self._rnn_cell_args(n, num_chunks=1, is_lstm=False, dev=dev, dtype=torch.float32)), ("rnn_relu_cell", self._rnn_cell_args(n, num_chunks=1, is_lstm=False, dev=dev, dtype=torch.float32)), ] self.torch_fp32 = [ ("acos", (pointwise0_fp16[0].clamp(-.9, 0.9),)), ("asin", (pointwise0_fp16[0].clamp(-.9, 0.9),)), ("cosh", pointwise0_fp16), ("erfinv", (pointwise0_fp16[0].clamp(-.9, .9),)), ("exp", pointwise0_fp16), ("expm1", pointwise0_fp16), ("log", (pointwise0_fp16[0].clamp(0.1, 100.0),)), ("log10", (pointwise0_fp16[0].clamp(0.1, 100.0),)), ("log2", (pointwise0_fp16[0].clamp(0.1, 100.0),)), ("log1p", (pointwise0_fp16[0].clamp(-0.9, 100.0),)), ("reciprocal", pointwise0_fp16), ("rsqrt", (pointwise0_fp16[0].clamp(0.0, 100.0),)), ("sinh", pointwise0_fp16), ("tan", (pointwise0_fp16[0].clamp(-3.1 / 2, 3.1 / 2),)), ("pow", ((pointwise0_fp16[0] + 1.).clamp(0.0, 100.0),) + pointwise1_fp16), ("pow", ((pointwise0_fp16[0] + 1.).clamp(0.0, 100.0),) + (1.7,)), # ("pow", (1.7,) + pointwise0_fp16), # This variant has a backend, but is not documented in the API. ("softmax", pointwise0_fp16 + (0,)), ("log_softmax", pointwise0_fp16 + (0,)), ("layer_norm", pointwise0_fp16 + ((pointwise0_fp16[0].numel(),),)), ("group_norm", mat0_fp16 + (1,)), ("norm", pointwise0_fp16), ("norm", pointwise0_fp16, {"dim": 0}), # these need magma # ("norm", mat0_fp16, {"p": "nuc"}), # ("norm", mat0_fp16, {"p": "nuc", "dim": 0}), ("norm", pointwise0_fp16, {"p": 1}), ("norm", pointwise0_fp16, {"p": 1, "dim": 0}), ("cosine_similarity", mat0_fp16 + mat1_fp16), ("poisson_nll_loss", mat0_fp16 + mat1_fp16 + (True, False, 1.e-8, torch.nn._reduction.get_enum('mean'))), ("cosine_embedding_loss", (torch.tensor([[1, 2, 3]], device=dev, dtype=torch.float16), torch.tensor([[1, 3, 4]], device=dev, dtype=torch.float16), torch.tensor([1], device=dev, dtype=torch.int))), ("hinge_embedding_loss", mat0_fp16 + (torch.ones(n, device=dev, dtype=torch.int),)), ("kl_div", mat0_fp16 + (torch.rand((n, n), device=dev, dtype=torch.float16),)), ("margin_ranking_loss", mat0_fp16 + mat1_fp16 + (torch.ones((n,), device=dev, dtype=torch.float16),)), ("triplet_margin_loss", mat0_fp16 + mat1_fp16 + mat2_fp16), ("binary_cross_entropy_with_logits", mat0_fp16 + (torch.rand((n, n), device=dev, dtype=torch.float16),)), ("cumprod", pointwise0_fp16 + (0,)), ("cumsum", pointwise0_fp16 + (0,)), ("dist", pointwise0_fp16 + pointwise1_fp16), ("pdist", mat0_fp16), ("cdist", mat0_fp16 + mat1_fp16), ("prod", pointwise0_fp16), ("prod", pointwise0_fp16 + (0,)), ("renorm", mat0_fp16 + (2, 0, 1.0)), ("sum", pointwise0_fp16), ("sum", mat0_fp16 + (1,)), ("logsumexp", mat0_fp16 + (1,)), ] self.torch_need_autocast_promote = [ ("addcdiv", pointwise0_fp32 + pointwise1_fp16 + (pointwise2_fp16[0].clamp(0.1, 100),)), ("addcmul", pointwise0_fp32 + pointwise1_fp16 + pointwise2_fp16), ("atan2", pointwise0_fp32 + (pointwise1_fp16[0].clamp(0.1, 100),)), ("bilinear", (torch.randn((1, 2), dtype=torch.float16, device=dev), torch.randn((1, 2), dtype=torch.float32, device=dev), torch.randn((1, 2, 2), dtype=torch.float16, device=dev), torch.randn((1,), dtype=torch.float32, device=dev))), ("cross", (torch.randn(3, dtype=torch.float32, device=dev), torch.randn(3, dtype=torch.float16, device=dev))), ("dot", pointwise0_fp16 + pointwise1_fp32), ("grid_sampler", (torch.randn((2, 3, 33, 22), dtype=torch.float16, device=dev), torch.randn((2, 22, 11, 2), dtype=torch.float32, device=dev), 0, 0, False)), ("index_put", pointwise0_fp32 + ((torch.tensor([1], device=dev, dtype=torch.long),), torch.randn(1, device=dev, dtype=torch.float16))), ("index_put", pointwise0_fp16 + ((torch.tensor([1], device=dev, dtype=torch.long),), torch.randn(1, device=dev, dtype=torch.float32))), ("tensordot", (torch.randn((2, 2, 2), dtype=torch.float32, device=dev), torch.randn((2, 2, 2), dtype=torch.float16, device=dev))), ("scatter_add", (torch.zeros(2, 2, 2, dtype=torch.float32, device=dev), 0, torch.randint(0, 2, (2, 2, 2), device=dev), torch.randn((2, 2, 2), dtype=torch.float16, device=dev))), ("scatter_add", (torch.zeros(2, 2, 2, dtype=torch.float16, device=dev), 0, torch.randint(0, 2, (2, 2, 2), device=dev), torch.randn((2, 2, 2), dtype=torch.float32, device=dev))), ] self.nn_fp16 = [ ("linear", mat0_fp32 + mat1_fp32 + mat2_fp32), ] self.nn_fp32 = [ ("softplus", pointwise0_fp16), ("nll_loss", (torch.rand((n, n), device=dev, dtype=torch.float), torch.zeros((n,), device=dev, dtype=torch.long))), ("nll_loss2d", (torch.rand((n, n, n, n), device=dev, dtype=torch.half), torch.zeros((n, n, n), device=dev, dtype=torch.long))), ("l1_loss", mat0_fp16 + mat1_fp16), ("smooth_l1_loss", mat0_fp16 + mat1_fp16), ("mse_loss", mat0_fp16 + mat1_fp16), ("multilabel_margin_loss", mat0_fp16 + (torch.ones((n, n), device=dev, dtype=torch.long),)), ("soft_margin_loss", mat0_fp16 + (torch.ones((n, n), device=dev, dtype=torch.long),)), ("multi_margin_loss", mat0_fp16 + (torch.ones((n,), device=dev, dtype=torch.long),)), ] self.linalg_fp16 = [ ("linalg_multi_dot", (mat0_fp32 + mat1_fp32 + mat2_fp32,)), ] self.methods_fp16 = [ ("__matmul__", mat0_fp32 + mat1_fp32) ] self.methods_fp32 = [ ("__pow__", (torch.rand(n, device=dev, dtype=torch.float16), 1.5)), ] self.banned = [ ("binary_cross_entropy", (torch.rand((n, n), device=dev, dtype=torch.float32), torch.rand((n, n), device=dev, dtype=torch.float32)), torch._C._nn), ]
self.fc = nn.Linear(512, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def forward(self, x): x = self.stage1(x) x = self.stage2(x) x = self.stage3(x) x = self.stage4(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x if __name__ =='__main__': model = se_resnet(300).cuda() img = torch.rand(4,3,80,80).cuda() #416 320 800 608 out = model(img) att = torch.rand(300,164).cuda() res = torch.mm(out,att) print(out.size()) print(res.size())
if __name__ == '__main__': import torch device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # b = HighRes2DNet(1, 2) # b.to(device) # b.eval() # print(b.num_parameters) # i = torch.rand(1, 1, 32, 32, device=device) # print(b(i).shape) b = HighRes3DNet( 1, 2, initial_out_channels_power=4, layers_per_residual_block=2, residual_blocks_per_dilation=3, dilations=3, # residual_type='project', ) b.to(device) # b.eval() print(b.num_parameters) print(b.get_receptive_field_world(spacing=2)) i = torch.rand(1, 1, 97, 115, 97, device=device) # 2 mm # print(b.get_receptive_field_world(spacing=3)) # i = torch.rand(1, 1, 64, 76, 64, device=device) # 3 mm # i = torch.rand(1, 1, 80, 80, 80, device=device) print(b(i).shape)
def conv_lstm_test(): input = torch.autograd.Variable(torch.rand(1, 30, 1, 128, 128)) model = ConvLSTM(input_dim=1, hidden_dim=[32, 64, 128], kernel_size=(3, 3), num_layers=3, batch_first=True, bias=True, return_all_layers=False) print(model) layer_output, last_state = model(input) layer_output = layer_output[0] h, c = last_state[0][0], last_state[0][1] print(layer_output[0].shape) print(len(last_state[0])) print(last_state[0][0].shape) print(last_state[0][1].shape) if __name__ == "__main__": input = torch.autograd.Variable(torch.rand(1, 30, 1, 40, 128)).to("cuda") model = EEGNet().to("cuda") output = model(input) print(output.shape)
h = torch.stack(h).permute(1, 0, 2) h_reshape = h.contiguous().view(batch_size * time_step, self.hidden_dim) if self.dropout > 0.0: h_reshape = self.nn_dropout(h_reshape) output = self.nn_output(h_reshape) output = self.sigmoid(output) output = output.contiguous().view(batch_size, time_step, self.output_dim) return output, inputse_att if __name__ == '__main__': parser = argparse.ArgumentParser() args = train.parse_arguments(parser) print('Constructing model ... ') device = torch.device("cuda:0" if torch.cuda.is_available() == True else 'cpu') print("available device: {}".format(device)) batch_x = torch.rand(128, 400, 76) batch_x = torch.tensor(batch_x, dtype=torch.float32).to(device) model = AdaCare(args.rnn_dim, args.kernel_size, args.kernel_num, args.input_dim, args.output_dim, args.dropout_rate, args.r_visit, args.r_conv, args.activation_func, device).to(device) cur_output, _ = model(batch_x, device) flops, params = profile(model, inputs=(batch_x, device)) print('flops: ', flops, ' params: ', params) print('!!!!!!!')
""" def __init__(self, inp=10, out=16, kernel_size=3): super(TestConv2d, self).__init__() self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=True) def forward(self, x): x = self.conv2d(x) return x #model = TestConv2d() input = torch.rand(1, 3, 473, 473).cuda() model = DeepLabV3(layers=50, dropout=0.1, classes=21, zoom_factor=8, pretrained=True).cuda() model.eval() print(model) output = model(input) #model = DeepLabV3(layers=101, classes=21, zoom_factor=8, pretrained=False) #input_np = np.random.uniform(0, 1, (1, 3, 313, 313)) #input_var = Variable(torch.FloatTensor(input_np)) #k_model = pytorch_to_keras(model, input_var, [(3, 313, 313,)], verbose=True)