Exemple #1
0
 def test_add_output_dim(self, cuda=False):
     for double in (False, True):
         tkwargs = {
             "device": torch.device("cuda") if cuda else torch.device("cpu"),
             "dtype": torch.double if double else torch.float,
         }
         original_batch_shape = torch.Size([2])
         # check exception is raised
         X = torch.rand(2, 1, **tkwargs)
         with self.assertRaises(ValueError):
             add_output_dim(X=X, original_batch_shape=original_batch_shape)
         # test no new batch dims
         X = torch.rand(2, 2, 1, **tkwargs)
         X_out, output_dim_idx = add_output_dim(
             X=X, original_batch_shape=original_batch_shape
         )
         self.assertTrue(torch.equal(X_out, X.unsqueeze(0)))
         self.assertEqual(output_dim_idx, 0)
         # test new batch dims
         X = torch.rand(3, 2, 2, 1, **tkwargs)
         X_out, output_dim_idx = add_output_dim(
             X=X, original_batch_shape=original_batch_shape
         )
         self.assertTrue(torch.equal(X_out, X.unsqueeze(1)))
         self.assertEqual(output_dim_idx, 1)
Exemple #2
0
def grad2():
    W = Variable(torch.rand(2, 2), requires_grad=True)
    W2 = Variable(torch.rand(2, 1), requires_grad=True)
    x1 = Variable(torch.rand(1, 2), requires_grad=True)
    x2 = Variable(torch.rand(1, 2), requires_grad=True)

    print("w: ")
    print(W)
    print("x1: ")
    print(x1)
    print("x2: ")
    print(x2)
    print("--------------------")

    y1 = torch.matmul(torch.matmul(x1, W), W2)
    print(torch.matmul(W, W2))
    # y = Variable(y, requires_grad=True)
    # print("y1:")
    # print(y1)

    y1.backward()
    # print(W.grad)
    print(x1.grad)

    # W.grad.data.zero_()
    # x1.grad.data.zero_()
    y2 = torch.matmul(torch.matmul(x2, W), W2)
    y2.backward()
    # print("y2: ")
    # print(y2)
    # print(W.grad)
    print(x2.grad)
Exemple #3
0
    def test_FixedNoiseMultiTaskGP_single_output(self, cuda=False):
        for double in (False, True):
            tkwargs = {
                "device": torch.device("cuda") if cuda else torch.device("cpu"),
                "dtype": torch.double if double else torch.float,
            }
            model = _get_fixed_noise_model_single_output(**tkwargs)
            self.assertIsInstance(model, FixedNoiseMultiTaskGP)
            self.assertIsInstance(model.likelihood, FixedNoiseGaussianLikelihood)
            self.assertIsInstance(model.mean_module, ConstantMean)
            self.assertIsInstance(model.covar_module, ScaleKernel)
            matern_kernel = model.covar_module.base_kernel
            self.assertIsInstance(matern_kernel, MaternKernel)
            self.assertIsInstance(matern_kernel.lengthscale_prior, GammaPrior)
            self.assertIsInstance(model.task_covar_module, IndexKernel)
            self.assertEqual(model._rank, 2)
            self.assertEqual(
                model.task_covar_module.covar_factor.shape[-1], model._rank
            )

            # test model fitting
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
            mll = fit_gpytorch_model(mll, options={"maxiter": 1})

            # test posterior
            test_x = torch.rand(2, 1, **tkwargs)
            posterior_f = model.posterior(test_x)
            self.assertIsInstance(posterior_f, GPyTorchPosterior)
            self.assertIsInstance(posterior_f.mvn, MultivariateNormal)

            # test posterior (batch eval)
            test_x = torch.rand(3, 2, 1, **tkwargs)
            posterior_f = model.posterior(test_x)
            self.assertIsInstance(posterior_f, GPyTorchPosterior)
            self.assertIsInstance(posterior_f.mvn, MultivariateNormal)
    def get_loss(self, image_a_pred, image_b_pred, mask_a, mask_b):
        loss = 0

        # get the nonzero indices
        mask_a_indices_flat = torch.nonzero(mask_a)
        mask_b_indices_flat = torch.nonzero(mask_b)
        if len(mask_a_indices_flat) == 0:
            return Variable(torch.cuda.LongTensor([0]), requires_grad=True)
        if len(mask_b_indices_flat) == 0:
            return Variable(torch.cuda.LongTensor([0]), requires_grad=True)

        # take 5000 random pixel samples of the object, using the mask
        num_samples = 10000

        rand_numbers_a = (torch.rand(num_samples)*len(mask_a_indices_flat)).cuda()
        rand_indices_a = Variable(torch.floor(rand_numbers_a).type(torch.cuda.LongTensor), requires_grad=False)
        randomized_mask_a_indices_flat = torch.index_select(mask_a_indices_flat, 0, rand_indices_a).squeeze(1)

        rand_numbers_b = (torch.rand(num_samples)*len(mask_b_indices_flat)).cuda()
        rand_indices_b = Variable(torch.floor(rand_numbers_b).type(torch.cuda.LongTensor), requires_grad=False)
        randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1)

        # index into the image and get descriptors
        M_margin = 0.5 # margin parameter
        random_img_a_object_descriptors = torch.index_select(image_a_pred, 1, randomized_mask_a_indices_flat)
        random_img_b_object_descriptors = torch.index_select(image_b_pred, 1, randomized_mask_b_indices_flat)
        pixel_wise_loss = (random_img_a_object_descriptors - random_img_b_object_descriptors).pow(2).sum(dim=2)
        pixel_wise_loss = torch.add(pixel_wise_loss, -2*M_margin)
        zeros_vec = torch.zeros_like(pixel_wise_loss)
        loss += torch.max(zeros_vec, pixel_wise_loss).sum()

        return loss
Exemple #5
0
    def setUp(self, size=(2, 5), batch=3, dtype=torch.float64, device=None,
              seed=None, mu=None, cov=None, A=None, b=None):
        '''Test the correctness of batch implementation of mean().

        This function will stack `[1 * mu, 2 * mu, ..., batch * mu]`.
        Then, it will see whether the batch output is accurate or not.

        Args:
            size: Tuple size of matrix A.
            batch: The batch size > 0.
            dtype: data type.
            device: In which device.
            seed: Seed for the random number generator.
            mu: To test a specific mean mu.
            cov: To test a specific covariance matrix.
            A: To test a specific A matrix.
            b: To test a specific bias b.
        '''
        if seed is not None:
            torch.manual_seed(seed)
        if A is None:
            A = torch.rand(size, dtype=dtype, device=device)
        if b is None:
            b = torch.rand(size[0], dtype=dtype, device=device)
        if mu is None:
            mu = torch.rand(size[1], dtype=dtype, device=device)
        if cov is None:
            cov = rand.definite(size[1], dtype=dtype, device=device,
                                positive=True, semi=False, norm=10**2)
        self.A = A
        self.b = b
        var = torch.diag(cov)
        self.batch_mean = torch.stack([(i + 1) * mu for i in range(batch)])
        self.batch_cov = torch.stack([(i + 1) * cov for i in range(batch)])
        self.batch_var = torch.stack([(i + 1) * var for i in range(batch)])
Exemple #6
0
def unit_test(args):
    ''' test different (kinds of) predicate detectors '''
    print("Torch uninitialized 5x3 matrix:")
    x_t = torch.Tensor(5, 3)
    print(x_t)

    print("Torch randomly initialized 5x3 matrix X:")
    x_t = torch.rand(5, 3)
    if args.verbose:
        print(x_t)
        print("size:", x_t.size())

    print("Torch randomly initialized 5x3 matrix Y:")
    y_t = torch.rand(5, 3)
    if args.verbose:
        print(y_t)
    print("X + Y:")
    z_t = torch.add(x_t, y_t)
    print(z_t)


    print("slice (X + Y)[:, 1]:")
    print(z_t[:, 1])

    num_wrong = 0
    print("unit_test:  num_tests:", 1,
          " num_wrong:", num_wrong, " -- ", "FAIL" if num_wrong else "PASS")
    def test_FixedNoiseGP(self, cuda=False):
        for batch_shape in (torch.Size([]), torch.Size([2])):
            for num_outputs in (1, 2):
                for double in (False, True):
                    tkwargs = {
                        "device": torch.device("cuda") if cuda else torch.device("cpu"),
                        "dtype": torch.double if double else torch.float,
                    }
                    model = self._get_model(
                        batch_shape=batch_shape,
                        num_outputs=num_outputs,
                        n=10,
                        **tkwargs
                    )
                    self.assertIsInstance(model, FixedNoiseGP)
                    self.assertIsInstance(
                        model.likelihood, FixedNoiseGaussianLikelihood
                    )
                    self.assertIsInstance(model.mean_module, ConstantMean)
                    self.assertIsInstance(model.covar_module, ScaleKernel)
                    matern_kernel = model.covar_module.base_kernel
                    self.assertIsInstance(matern_kernel, MaternKernel)
                    self.assertIsInstance(matern_kernel.lengthscale_prior, GammaPrior)

                    # test model fitting
                    mll = ExactMarginalLogLikelihood(model.likelihood, model)
                    mll = fit_gpytorch_model(mll, options={"maxiter": 1})

                    # Test forward
                    test_x = torch.rand(batch_shape + torch.Size([3, 1]), **tkwargs)
                    posterior = model(test_x)
                    self.assertIsInstance(posterior, MultivariateNormal)

                    # TODO: Pass observation noise into posterior
                    # posterior_obs = model.posterior(test_x, observation_noise=True)
                    # self.assertTrue(
                    #     torch.allclose(
                    #         posterior_f.variance + 0.01,
                    #         posterior_obs.variance
                    #     )
                    # )

                    # test posterior
                    # test non batch evaluation
                    X = torch.rand(batch_shape + torch.Size([3, 1]), **tkwargs)
                    posterior = model.posterior(X)
                    self.assertIsInstance(posterior, GPyTorchPosterior)
                    self.assertEqual(
                        posterior.mean.shape, batch_shape + torch.Size([3, num_outputs])
                    )
                    # test batch evaluation
                    X = torch.rand(
                        torch.Size([2]) + batch_shape + torch.Size([3, 1]), **tkwargs
                    )
                    posterior = model.posterior(X)
                    self.assertIsInstance(posterior, GPyTorchPosterior)
                    self.assertEqual(
                        posterior.mean.shape,
                        torch.Size([2]) + batch_shape + torch.Size([3, num_outputs]),
                    )
    def visualize_results(self, epoch, fix=True):
        self.G.eval()

        if not os.path.exists(self.result_dir + '/' + self.dataset + '/' + self.model_name):
            os.makedirs(self.result_dir + '/' + self.dataset + '/' + self.model_name)

        image_frame_dim = int(np.floor(np.sqrt(self.sample_num)))

        if fix:
            """ fixed noise """
            samples = self.G(self.sample_z_, self.sample_y_)
        else:
            """ random noise """
            temp = torch.LongTensor(self.batch_size, 1).random_() % 10
            sample_y_ = torch.FloatTensor(self.batch_size, 10)
            sample_y_.zero_()
            sample_y_.scatter_(1, temp, 1)
            if self.gpu_mode:
                sample_z_, sample_y_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True), \
                                       Variable(sample_y_.cuda(), volatile=True)
            else:
                sample_z_, sample_y_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True), \
                                       Variable(sample_y_, volatile=True)

            samples = self.G(sample_z_, sample_y_)

        if self.gpu_mode:
            samples = samples.cpu().data.numpy().transpose(0, 2, 3, 1)
        else:
            samples = samples.data.numpy().transpose(0, 2, 3, 1)

        utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim],
                          self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png')
 def test_forward_works_on_higher_order_input(self):
     params = Params({
             "words": {
                     "type": "embedding",
                     "num_embeddings": 20,
                     "embedding_dim": 2,
                     },
             "characters": {
                     "type": "character_encoding",
                     "embedding": {
                             "embedding_dim": 4,
                             "num_embeddings": 15,
                             },
                     "encoder": {
                             "type": "cnn",
                             "embedding_dim": 4,
                             "num_filters": 10,
                             "ngram_filter_sizes": [3],
                             },
                     }
             })
     token_embedder = BasicTextFieldEmbedder.from_params(self.vocab, params)
     inputs = {
             'words': Variable(torch.rand(3, 4, 5, 6) * 20).long(),
             'characters': Variable(torch.rand(3, 4, 5, 6, 7) * 15).long(),
             }
     assert token_embedder(inputs, num_wrapping_dims=2).size() == (3, 4, 5, 6, 12)
    def test_fit_valid_sets_args(self, gtvs):
        x = torch.rand(1,5)
        y = torch.rand(1,5)
        val_data = (1,2)
        val_split = 0.2
        shuffle = False

        torchmodel = MagicMock()
        torchmodel.forward = Mock(return_value=1)
        optimizer = MagicMock()
        metric = Metric('test')

        loss = torch.tensor([2], requires_grad=True)
        criterion = Mock(return_value=loss)

        gtvs.return_value = (1, 2)

        torchbearermodel = Model(torchmodel, optimizer, criterion, [metric])
        torchbearermodel.fit_generator = Mock()
        torchbearermodel.fit(x, y, 1, validation_data=val_data, validation_split=val_split, shuffle=shuffle)

        gtvs.assert_called_once()
        self.assertTrue(list(gtvs.call_args[0][0].numpy()[0]) == list(x.numpy()[0]))
        self.assertTrue(list(gtvs.call_args[0][1].numpy()[0]) == list(y.numpy()[0]))
        self.assertTrue(gtvs.call_args[0][2] == val_data)
        self.assertTrue(gtvs.call_args[0][3] == val_split)
        self.assertTrue(gtvs.call_args[1]['shuffle'] == shuffle)
Exemple #11
0
    def test_upper_confidence_bound(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            mean = torch.tensor([[0.0]], device=device, dtype=dtype)
            variance = torch.tensor([[1.0]], device=device, dtype=dtype)
            mm = MockModel(MockPosterior(mean=mean, variance=variance))

            module = UpperConfidenceBound(model=mm, beta=1.0)
            X = torch.zeros(1, 1, device=device, dtype=dtype)
            ucb = module(X)
            ucb_expected = torch.tensor([1.0], device=device, dtype=dtype)
            self.assertTrue(torch.allclose(ucb, ucb_expected, atol=1e-4))

            module = UpperConfidenceBound(model=mm, beta=1.0, maximize=False)
            X = torch.zeros(1, 1, device=device, dtype=dtype)
            ucb = module(X)
            ucb_expected = torch.tensor([-1.0], device=device, dtype=dtype)
            self.assertTrue(torch.allclose(ucb, ucb_expected, atol=1e-4))

            # check for proper error if multi-output model
            mean2 = torch.rand(1, 2, device=device, dtype=dtype)
            variance2 = torch.rand(1, 2, device=device, dtype=dtype)
            mm2 = MockModel(MockPosterior(mean=mean2, variance=variance2))
            module2 = UpperConfidenceBound(model=mm2, beta=1.0)
            with self.assertRaises(UnsupportedError):
                module2(X)
    def visualize_results(self, epoch, fix=True):
        self.G.eval()

        if not os.path.exists(self.result_dir + '/' + self.dataset + '/' + self.model_name):
            os.makedirs(self.result_dir + '/' + self.dataset + '/' + self.model_name)

        tot_num_samples = min(self.sample_num, self.batch_size)
        image_frame_dim = int(np.floor(np.sqrt(tot_num_samples)))

        if fix:
            """ fixed noise """
            samples = self.G(self.sample_z_)
        else:
            """ random noise """
            if self.gpu_mode:
                sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True)
            else:
                sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True)

            samples = self.G(sample_z_)

        if self.gpu_mode:
            samples = samples.cpu().data.numpy().transpose(0, 2, 3, 1)
        else:
            samples = samples.data.numpy().transpose(0, 2, 3, 1)

        utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim],
                          self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png')
def sample_relax(logits): #, k=1):
    

    # u = torch.rand(B,C).clamp(1e-8, 1.-1e-8) #.cuda()
    u = torch.rand(B,C).clamp(1e-12, 1.-1e-12) #.cuda()
    gumbels = -torch.log(-torch.log(u))
    z = logits + gumbels
    b = torch.argmax(z, dim=1)

    cat = Categorical(logits=logits)
    logprob = cat.log_prob(b).view(B,1)

    v_k = torch.rand(B,1).clamp(1e-12, 1.-1e-12)
    z_tilde_b = -torch.log(-torch.log(v_k))
    #this way seems biased even tho it shoudlnt be
    # v_k = torch.gather(input=u, dim=1, index=b.view(B,1))
    # z_tilde_b = torch.gather(input=z, dim=1, index=b.view(B,1))

    v = torch.rand(B,C).clamp(1e-12, 1.-1e-12) #.cuda()
    probs = torch.softmax(logits,dim=1).repeat(B,1)
    # print (probs.shape, torch.log(v_k).shape, torch.log(v).shape)
    # fasdfa

    # print (v.shape)
    # print (v.shape)
    z_tilde = -torch.log((- torch.log(v) / probs) - torch.log(v_k))

    # print (z_tilde)
    # print (z_tilde_b)
    z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)
    # print (z_tilde)
    # fasdfs

    return z, b, logprob, z_tilde
Exemple #14
0
    def test_degenerate_GPyTorchPosterior(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # singular covariance matrix
            degenerate_covar = torch.tensor(
                [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=device
            )
            mean = torch.rand(3, dtype=dtype, device=device)
            mvn = MultivariateNormal(mean, lazify(degenerate_covar))
            posterior = GPyTorchPosterior(mvn=mvn)
            # basics
            self.assertEqual(posterior.device.type, device.type)
            self.assertTrue(posterior.dtype == dtype)
            self.assertEqual(posterior.event_shape, torch.Size([3, 1]))
            self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1)))
            variance_exp = degenerate_covar.diag().unsqueeze(-1)
            self.assertTrue(torch.equal(posterior.variance, variance_exp))

            # rsample
            with warnings.catch_warnings(record=True) as w:
                # we check that the p.d. warning is emitted - this only
                # happens once per posterior, so we need to check only once
                samples = posterior.rsample(sample_shape=torch.Size([4]))
                self.assertEqual(len(w), 1)
                self.assertTrue(issubclass(w[-1].category, RuntimeWarning))
                self.assertTrue("not p.d." in str(w[-1].message))
            self.assertEqual(samples.shape, torch.Size([4, 3, 1]))
            samples2 = posterior.rsample(sample_shape=torch.Size([4, 2]))
            self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1]))
            # rsample w/ base samples
            base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype)
            samples_b1 = posterior.rsample(
                sample_shape=torch.Size([4]), base_samples=base_samples
            )
            samples_b2 = posterior.rsample(
                sample_shape=torch.Size([4]), base_samples=base_samples
            )
            self.assertTrue(torch.allclose(samples_b1, samples_b2))
            base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype)
            samples2_b1 = posterior.rsample(
                sample_shape=torch.Size([4, 2]), base_samples=base_samples2
            )
            samples2_b2 = posterior.rsample(
                sample_shape=torch.Size([4, 2]), base_samples=base_samples2
            )
            self.assertTrue(torch.allclose(samples2_b1, samples2_b2))
            # collapse_batch_dims
            b_mean = torch.rand(2, 3, dtype=dtype, device=device)
            b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape)
            b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar))
            b_posterior = GPyTorchPosterior(mvn=b_mvn)
            b_base_samples = torch.randn(4, 2, 3, 1, device=device, dtype=dtype)
            with warnings.catch_warnings(record=True) as w:
                b_samples = b_posterior.rsample(
                    sample_shape=torch.Size([4]), base_samples=b_base_samples
                )
                self.assertEqual(len(w), 1)
                self.assertTrue(issubclass(w[-1].category, RuntimeWarning))
                self.assertTrue("not p.d." in str(w[-1].message))
            self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
Exemple #15
0
def run_test_argmax():
    test_argmax = TestArgMax()
    k=torch.rand(4)
    v=torch.rand(4)
    y=torch.rand(4)
    loss = test_argmax(k,v,y)
    loss.backward()
  def setUp(self):
    # Tests will use 3 filters and image width, height = 2 X 2

    # Batch size 1
    x = torch.ones((1, 3, 2, 2))
    x[0, 0, 1, 0] = 1.1
    x[0, 0, 1, 1] = 1.2
    x[0, 1, 0, 1] = 1.2
    x[0, 2, 1, 0] = 1.3
    self.x = x
    self.gradient = torch.rand(x.shape)

    # Batch size 2
    x = torch.ones((2, 3, 2, 2))
    x[0, 0, 1, 0] = 1.1
    x[0, 0, 1, 1] = 1.2
    x[0, 1, 0, 1] = 1.2
    x[0, 2, 1, 0] = 1.3

    x[1, 0, 0, 0] = 1.4
    x[1, 1, 0, 0] = 1.5
    x[1, 1, 0, 1] = 1.6
    x[1, 2, 1, 1] = 1.7
    self.x2 = x
    self.gradient2 = torch.rand(x.shape)

    # All equal
    self.dutyCycle = torch.zeros((1, 3, 1, 1))
    self.dutyCycle[:] = 1.0 / 3.0
    def sample_relax(probs):
        #Sample z
        u = torch.rand(B,C)
        gumbels = -torch.log(-torch.log(u))
        z = torch.log(probs) + gumbels

        b = torch.argmax(z, dim=1)
        logprob = cat.log_prob(b)


        #Sample z_tilde
        u_b = torch.rand(B,1)
        z_tilde_b = -torch.log(-torch.log(u_b))
        u = torch.rand(B,C)
        z_tilde = -torch.log((- torch.log(u) / probs) - torch.log(u_b))

        # print (z_tilde)
        z_tilde[:,b] = z_tilde_b
        # print (z_tilde)

        # fasdfasd

        # print (z)
        # print (b)
        # print (z_tilde)
        # print (logprob)
        # print (probs)
        # fsdfa

        return z, b, logprob, z_tilde
def sample_relax_given_class(logits, samp):

    cat = Categorical(logits=logits)

    u = torch.rand(B,C).clamp(1e-8, 1.-1e-8)
    gumbels = -torch.log(-torch.log(u))
    z = logits + gumbels

    b = samp #torch.argmax(z, dim=1)
    logprob = cat.log_prob(b).view(B,1)


    u_b = torch.gather(input=u, dim=1, index=b.view(B,1))
    z_tilde_b = -torch.log(-torch.log(u_b))
    
    z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits, dim=1)) - torch.log(u_b))
    z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)


    z = z_tilde

    u_b = torch.gather(input=u, dim=1, index=b.view(B,1))
    z_tilde_b = -torch.log(-torch.log(u_b))
    
    u = torch.rand(B,C).clamp(1e-8, 1.-1e-8)
    z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits, dim=1)) - torch.log(u_b))
    z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)

    return z, z_tilde, logprob
 def test_forward_runs_with_non_bijective_mapping(self):
     elmo_fixtures_path = self.FIXTURES_ROOT / 'elmo'
     options_file = str(elmo_fixtures_path / 'options.json')
     weight_file = str(elmo_fixtures_path / 'lm_weights.hdf5')
     params = Params({
             "token_embedders": {
                     "words": {
                             "type": "embedding",
                             "num_embeddings": 20,
                             "embedding_dim": 2,
                             },
                     "elmo": {
                             "type": "elmo_token_embedder",
                             "options_file": options_file,
                             "weight_file": weight_file
                             },
                     },
             "embedder_to_indexer_map": {"words": ["words"], "elmo": ["elmo", "words"]}
             })
     token_embedder = BasicTextFieldEmbedder.from_params(self.vocab, params)
     inputs = {
             'words': (torch.rand(3, 6) * 20).long(),
             'elmo': (torch.rand(3, 6, 50) * 15).long(),
             }
     token_embedder(inputs)
def test_sequential_scorer_d4_3():
    global test_doc
    torch.manual_seed(1)
    seq = SequentialScorer(TEST_EMBEDDING_DIM, min_features, 2, COREF_FF_HIDDEN)
    emb5 = ag.Variable(torch.rand(1, TEST_EMBEDDING_DIM))
    emb0 = ag.Variable(torch.rand(1, TEST_EMBEDDING_DIM))
    pred = float(seq(emb5, emb0, ['exact-match', 'last-token-match']))
    assert_almost_equals(pred, -0.359851, places=4)
def _get_random_data(n, **tkwargs):
    train_x1 = torch.linspace(0, 0.95, n + 1, **tkwargs) + 0.05 * torch.rand(
        n + 1, **tkwargs
    )
    train_x2 = torch.linspace(0, 0.95, n, **tkwargs) + 0.05 * torch.rand(n, **tkwargs)
    train_y1 = torch.sin(train_x1 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x1)
    train_y2 = torch.cos(train_x2 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x2)
    return train_x1.unsqueeze(-1), train_x2.unsqueeze(-1), train_y1, train_y2
    def __init__(self, args):
        # parameters
        self.epoch = args.epoch
        self.sample_num = 64
        self.batch_size = args.batch_size
        self.save_dir = args.save_dir
        self.result_dir = args.result_dir
        self.dataset = args.dataset
        self.log_dir = args.log_dir
        self.gpu_mode = args.gpu_mode
        self.model_name = args.gan_type

        # BEGAN parameters
        self.gamma = 0.75
        self.lambda_ = 0.001
        self.k = 0.

        # networks init
        self.G = generator(self.dataset)
        self.D = discriminator(self.dataset)
        self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2))
        self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2))

        if self.gpu_mode:
            self.G.cuda()
            self.D.cuda()
            # self.L1_loss = torch.nn.L1loss().cuda()   # BEGAN does not work well when using L1loss().
        # else:
        #     self.L1_loss = torch.nn.L1loss()

        print('---------- Networks architecture -------------')
        utils.print_network(self.G)
        utils.print_network(self.D)
        print('-----------------------------------------------')

        # load dataset
        if self.dataset == 'mnist':
            self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True,
                                                         transform=transforms.Compose(
                                                             [transforms.ToTensor()])),
                                          batch_size=self.batch_size, shuffle=True)
        elif self.dataset == 'fashion-mnist':
            self.data_loader = DataLoader(
                datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose(
                    [transforms.ToTensor()])),
                batch_size=self.batch_size, shuffle=True)
        elif self.dataset == 'celebA':
            self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose(
                [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size,
                                                 shuffle=True)
        self.z_dim = 62

        # fixed noise
        if self.gpu_mode:
            self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True)
        else:
            self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True)
    def __init__(self, args):
        # parameters
        self.epoch = args.epoch
        self.sample_num = 64
        self.batch_size = args.batch_size
        self.save_dir = args.save_dir
        self.result_dir = args.result_dir
        self.dataset = args.dataset
        self.log_dir = args.log_dir
        self.gpu_mode = args.gpu_mode
        self.model_name = args.gan_type

        # EBGAN parameters
        self.pt_loss_weight = 0.1
        self.margin = max(1, self.batch_size / 64.)  # margin for loss function
        # usually margin of 1 is enough, but for large batch size it must be larger than 1

        # networks init
        self.G = generator(self.dataset)
        self.D = discriminator(self.dataset)
        self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2))
        self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2))

        if self.gpu_mode:
            self.G.cuda()
            self.D.cuda()
            self.MSE_loss = nn.MSELoss().cuda()
        else:
            self.MSE_loss = nn.MSELoss()

        print('---------- Networks architecture -------------')
        utils.print_network(self.G)
        utils.print_network(self.D)
        print('-----------------------------------------------')

        # load dataset
        if self.dataset == 'mnist':
            self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True,
                                                         transform=transforms.Compose(
                                                             [transforms.ToTensor()])),
                                          batch_size=self.batch_size, shuffle=True)
        elif self.dataset == 'fashion-mnist':
            self.data_loader = DataLoader(
                datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose(
                    [transforms.ToTensor()])),
                batch_size=self.batch_size, shuffle=True)
        elif self.dataset == 'celebA':
            self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose(
                [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size,
                                                 shuffle=True)
        self.z_dim = 62

        # fixed noise
        if self.gpu_mode:
            self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True)
        else:
            self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True)
    def sample_relax_given_b(logits, b):

        u_b = torch.rand(B,1).clamp(1e-10, 1.-1e-10).cuda()
        z_tilde_b = -torch.log(-torch.log(u_b))

        u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
        z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits,dim=1)) - torch.log(u_b))
        z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)

        return z_tilde
Exemple #25
0
 def test_GPyTorchPosterior(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         mean = torch.rand(3, dtype=dtype, device=device)
         variance = 1 + torch.rand(3, dtype=dtype, device=device)
         covar = variance.diag()
         mvn = MultivariateNormal(mean, lazify(covar))
         posterior = GPyTorchPosterior(mvn=mvn)
         # basics
         self.assertEqual(posterior.device.type, device.type)
         self.assertTrue(posterior.dtype == dtype)
         self.assertEqual(posterior.event_shape, torch.Size([3, 1]))
         self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1)))
         self.assertTrue(torch.equal(posterior.variance, variance.unsqueeze(-1)))
         # rsample
         samples = posterior.rsample()
         self.assertEqual(samples.shape, torch.Size([1, 3, 1]))
         samples = posterior.rsample(sample_shape=torch.Size([4]))
         self.assertEqual(samples.shape, torch.Size([4, 3, 1]))
         samples2 = posterior.rsample(sample_shape=torch.Size([4, 2]))
         self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1]))
         # rsample w/ base samples
         base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype)
         # incompatible shapes
         with self.assertRaises(RuntimeError):
             posterior.rsample(
                 sample_shape=torch.Size([3]), base_samples=base_samples
             )
         samples_b1 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         samples_b2 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         self.assertTrue(torch.allclose(samples_b1, samples_b2))
         base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype)
         samples2_b1 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         samples2_b2 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         self.assertTrue(torch.allclose(samples2_b1, samples2_b2))
         # collapse_batch_dims
         b_mean = torch.rand(2, 3, dtype=dtype, device=device)
         b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=device)
         b_covar = b_variance.unsqueeze(-1) * torch.eye(3).type_as(b_variance)
         b_mvn = MultivariateNormal(b_mean, lazify(b_covar))
         b_posterior = GPyTorchPosterior(mvn=b_mvn)
         b_base_samples = torch.randn(4, 1, 3, 1, device=device, dtype=dtype)
         b_samples = b_posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=b_base_samples
         )
         self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
Exemple #26
0
 def test_MockPosterior(self):
     mean = torch.rand(2)
     variance = torch.eye(2)
     samples = torch.rand(1, 2)
     mp = MockPosterior(mean=mean, variance=variance, samples=samples)
     self.assertTrue(torch.equal(mp.mean, mean))
     self.assertTrue(torch.equal(mp.variance, variance))
     self.assertTrue(torch.all(mp.sample() == samples.unsqueeze(0)))
     self.assertTrue(
         torch.all(mp.sample(torch.Size([2])) == samples.repeat(2, 1, 1))
     )
Exemple #27
0
def sample_code(num, cat_dim=0, cont_dim=0, bin_dim=0, device=None) -> torch.Tensor:
    cat_onehot = cont = bin = None
    if cat_dim > 0:
        cat = torch.randint(cat_dim, size=(num, 1), dtype=torch.long, device=device)
        cat_onehot = torch.zeros(num, cat_dim, dtype=torch.float, device=device)
        cat_onehot.scatter_(1, cat, 1)
    if cont_dim > 0:
        cont = 2. * torch.rand(num, cont_dim, device=device) - 1.
    if bin_dim > 0:
        bin = (torch.rand(num, bin_dim, device=device) > .5).float()
    return torch.cat([x for x in [cat_onehot, cont, bin] if x is not None], 1)
Exemple #28
0
def run_test():
    test = Test()

    a=Variable(torch.rand(4,5))
    b=Variable(torch.rand(4,5))
    c=torch.rand(4)
    d=torch.rand(4) # ground-truth

    #cv=Variable(c)
    loss = test(c,d)
    loss.backward()
    def test_HeterskedasticSingleTaskGP(self, cuda=False):
        for batch_shape in (torch.Size([]), torch.Size([2])):
            for num_outputs in (1, 2):
                for double in (False, True):
                    tkwargs = {
                        "device": torch.device("cuda") if cuda else torch.device("cpu"),
                        "dtype": torch.double if double else torch.float,
                    }
                    model = self._get_model(
                        batch_shape=batch_shape, num_outputs=num_outputs, **tkwargs
                    )
                    # test init
                    self.assertIsInstance(model.mean_module, ConstantMean)
                    self.assertIsInstance(model.covar_module, ScaleKernel)
                    matern_kernel = model.covar_module.base_kernel
                    self.assertIsInstance(matern_kernel, MaternKernel)
                    self.assertIsInstance(matern_kernel.lengthscale_prior, GammaPrior)
                    likelihood = model.likelihood
                    self.assertIsInstance(likelihood, _GaussianLikelihoodBase)
                    self.assertFalse(isinstance(likelihood, GaussianLikelihood))
                    self.assertIsInstance(likelihood.noise_covar, HeteroskedasticNoise)

                    # test forward
                    test_x = torch.rand(batch_shape + torch.Size([3, 1]), **tkwargs)
                    posterior = model(test_x)
                    self.assertIsInstance(posterior, MultivariateNormal)

                    # check param sizes
                    params = dict(model.named_parameters())
                    for p in params:
                        self.assertEqual(
                            params[p].numel(),
                            num_outputs * torch.tensor(batch_shape).prod().item(),
                        )

                    # test posterior
                    # test non batch evaluation
                    X = torch.rand(batch_shape + torch.Size([3, 1]), **tkwargs)
                    posterior = model.posterior(X)
                    self.assertIsInstance(posterior, GPyTorchPosterior)
                    self.assertEqual(
                        posterior.mean.shape, batch_shape + torch.Size([3, num_outputs])
                    )
                    # test batch evaluation
                    X = torch.rand(
                        torch.Size([2]) + batch_shape + torch.Size([3, 1]), **tkwargs
                    )
                    posterior = model.posterior(X)
                    self.assertIsInstance(posterior, GPyTorchPosterior)
                    self.assertEqual(
                        posterior.mean.shape,
                        torch.Size([2]) + batch_shape + torch.Size([3, num_outputs]),
                    )
Exemple #30
0
 def test_gpytorch_model(self):
     train_X = torch.rand(5, 1)
     train_Y = torch.sin(train_X.squeeze())
     # basic test
     model = SimpleGPyTorchModel(train_X, train_Y)
     test_X = torch.rand(2, 1)
     posterior = model.posterior(test_X)
     self.assertIsInstance(posterior, GPyTorchPosterior)
     self.assertEqual(posterior.mean.shape, torch.Size([2, 1]))
     # test observation noise
     posterior = model.posterior(test_X, observation_noise=True)
     self.assertIsInstance(posterior, GPyTorchPosterior)
     self.assertEqual(posterior.mean.shape, torch.Size([2, 1]))
Exemple #31
0
def summary(model, input_size, batch_size=-1, device="cuda"):
    def register_hook(module):
        def hook(module, input, output):
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)

            m_key = "%s-%i" % (class_name, module_idx + 1)
            summary[m_key] = OrderedDict()
            summary[m_key]["input_shape"] = list(input[0].size())
            summary[m_key]["input_shape"][0] = batch_size
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [[-1] + list(o.size())[1:]
                                                  for o in output]
            else:
                summary[m_key]["output_shape"] = list(output.size())
                summary[m_key]["output_shape"][0] = batch_size

            params = 0
            params_bits = 0
            # TODO: handle batchnorm params
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                weight_params = torch.prod(
                    torch.LongTensor(list(module.weight.size())))
                params += weight_params
                params_bits += weight_params * 32

                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "shift") and hasattr(module.shift, "size"):
                assert (hasattr(module, "sign"))
                assert (hasattr(module.sign, "size"))
                assert (module.shift.size() == module.sign.size())

                shift_params = torch.prod(
                    torch.LongTensor(list(module.shift.size())))
                params += shift_params
                params_bits += shift_params * 5

                summary[m_key]["trainable"] = module.shift.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                bias_params = torch.prod(
                    torch.LongTensor(list(module.bias.size())))
                params += bias_params
                params_bits += bias_params * 32
            if hasattr(module, "running_mean") and hasattr(
                    module.running_mean, "size") and hasattr(
                        module,
                        "track_running_stats") and module.track_running_stats:
                running_mean_params = torch.prod(
                    torch.LongTensor(list(module.running_mean.size())))
                params += running_mean_params
                params_bits += running_mean_params * 32
            if hasattr(module, "running_var") and hasattr(
                    module.running_var, "size") and hasattr(
                        module,
                        "track_running_stats") and module.track_running_stats:
                running_var_params = torch.prod(
                    torch.LongTensor(list(module.running_var.size())))
                params += running_var_params
                params_bits += running_var_params * 32
            summary[m_key]["nb_params"] = params
            summary[m_key]["bits_params"] = params_bits

        if (not isinstance(module, nn.Sequential)
                and not isinstance(module, nn.ModuleList)
                and not (module == model)):
            hooks.append(module.register_forward_hook(hook))

    device = device.lower()
    assert device in [
        "cuda",
        "cpu",
    ], "Input device is not valid, please specify 'cuda' or 'cpu'"

    if device == "cuda" and torch.cuda.is_available():
        dtype = torch.cuda.FloatTensor
    else:
        dtype = torch.FloatTensor

    # multiple inputs to the network
    if isinstance(input_size, tuple):
        input_size = [input_size]

    # batch_size of at least 2 for each GPU for batchnorm
    n_samples = (torch.cuda.device_count() + 1) * 2
    x = [torch.rand(n_samples, *in_size).type(dtype) for in_size in input_size]
    # print(type(x[0]))

    # create properties
    summary = OrderedDict()
    hooks = []

    # register hook
    model.apply(register_hook)

    # make a forward pass
    # print(x.shape)
    model(*x)

    # remove these hooks
    for h in hooks:
        h.remove()

    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape",
                                              "Param #")
    print(line_new)
    print("================================================================")
    total_params = 0
    total_params_bits = 0
    total_output = 0
    trainable_params = 0
    for layer in summary:
        # input_shape, output_shape, trainable, nb_params
        line_new = "{:>20}  {:>25} {:>15}".format(
            layer,
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
        )
        total_params += summary[layer]["nb_params"]
        total_params_bits += summary[layer]["bits_params"]
        total_output += np.prod(summary[layer]["output_shape"])
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        print(line_new)

    # assume 4 bytes/number (float on cuda).
    total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024**2.))
    total_output_size = abs(2. * total_output * 4. /
                            (1024**2.))  # x2 for gradients
    total_params_size = abs(total_params_bits.numpy() / (8. * (1024**2.)))
    total_size = total_params_size + total_output_size + total_input_size

    print("================================================================")
    print("Total params: {0:,}".format(total_params))
    print("Trainable params: {0:,}".format(trainable_params))
    print("Non-trainable params: {0:,}".format(total_params -
                                               trainable_params))
    print("----------------------------------------------------------------")
    print("Input size (MB): %0.2f" % total_input_size)
    print("Forward/backward pass size (MB): %0.2f" % total_output_size)
    print("Params size (MB): %0.2f" % total_params_size)
    print("Estimated Total Size (MB): %0.2f" % total_size)
    print("----------------------------------------------------------------")
Exemple #32
0
def main():
    args = parse_args()
    reset_config(config, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        config, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.' + config.MODEL.NAME + '.get_pose_net')(
        config, is_train=True)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', config.MODEL.NAME + '.py'),
        final_output_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }
    #32*3*256*192
    dump_input = torch.rand(
        (config.TRAIN.BATCH_SIZE, 3, config.MODEL.IMAGE_SIZE[1],
         config.MODEL.IMAGE_SIZE[0]))
    writer_dict['writer'].add_graph(model, (dump_input, ), verbose=False)

    gpus = [int(i) for i in config.GPUS.split(',')]
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda()

    optimizer = get_optimizer(config, model)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR)

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + config.DATASET.DATASET)(
        config, config.DATASET.ROOT, config.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + config.DATASET.DATASET)(
        config, config.DATASET.ROOT, config.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE * len(gpus),
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.TEST.BATCH_SIZE * len(gpus),
        shuffle=False,
        num_workers=config.WORKERS,
        pin_memory=True)

    best_perf = 0.0
    best_model = False
    for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train for one epoch
        #test train_loader
        dataiter = train_dataset[0]
        train(config, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)

        # evaluate on validation set
        perf_indicator = validate(config, valid_loader, valid_dataset, model,
                                  criterion, final_output_dir, tb_log_dir,
                                  writer_dict)

        if perf_indicator > best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': get_model_name(config),
                'state_dict': model.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir,
                                          'final_state.pth.tar')
    logger.info(
        'saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Exemple #33
0
    def train(self):
        iteration = -1
        label = Variable(torch.FloatTensor(batch_size, 1)).to(device)
        while self.epoch <= max_epoch:
            adjust_learning_rate(self.optimizer_G, iteration)
            adjust_learning_rate(self.optimizer_D, iteration)
            for i, (anime_tag, anime_img) in enumerate(self.data_loader):
                iteration += 1
                if anime_img.shape[0] != batch_size:
                    continue
                anime_img = Variable(anime_img).to(device)
                anime_tag = Variable(torch.FloatTensor(anime_tag)).to(device)
                # D : G = 2 : 1
                # 1. Training D
                # 1.1. use real image for discriminating
                self.D.zero_grad()
                label_p, tag_p = self.D(anime_img)
                label.data.fill_(1.0)

                # 1.2. real image's loss
                real_label_loss = self.label_criterion(label_p, label)
                real_tag_loss = self.tag_criterion(tag_p, anime_tag)
                real_loss_sum = real_label_loss * lambda_adv / 2.0 + real_tag_loss * lambda_adv / 2.0
                real_loss_sum.backward()

                # 1.3. use fake image for discriminating
                g_noise, fake_tag = utils.fake_generator(
                    batch_size, noise_size, device)
                fake_feat = torch.cat([g_noise, fake_tag], dim=1)
                fake_img = self.G(fake_feat).detach()
                fake_label_p, fake_tag_p = self.D(fake_img)
                label.data.fill_(.0)

                # 1.4. fake image's loss
                fake_label_loss = self.label_criterion(fake_label_p, label)
                fake_tag_loss = self.tag_criterion(fake_tag_p, fake_tag)
                fake_loss_sum = fake_label_loss * lambda_adv / 2.0 + fake_tag_loss * lambda_adv / 2.0
                fake_loss_sum.backward()

                # 1.5. gradient penalty
                # https://github.com/jfsantos/dragan-pytorch/blob/master/dragan.py
                alpha_size = [1] * anime_img.dim()
                alpha_size[0] = anime_img.size(0)
                alpha = torch.rand(alpha_size).to(device)
                x_hat = Variable(alpha * anime_img.data + (1 - alpha) * \
                                 (anime_img.data + 0.5 * anime_img.data.std() * Variable(torch.rand(anime_img.size())).to(device)),
                                 requires_grad=True).to(device)
                pred_hat, pred_tag = self.D(x_hat)
                gradients = grad(outputs=pred_hat,
                                 inputs=x_hat,
                                 grad_outputs=torch.ones(
                                     pred_hat.size()).to(device),
                                 create_graph=True,
                                 retain_graph=True,
                                 only_inputs=True)[0].view(x_hat.size(0), -1)
                gradient_penalty = lambda_gp * (
                    (gradients.norm(2, dim=1) - 1)**2).mean()
                # gradient_penalty.requires_grad = True
                gradient_penalty = Variable(gradient_penalty,
                                            requires_grad=True)
                gradient_penalty.backward()

                # 1.6. update optimizer
                self.optimizer_D.step()

                # 2. Training G
                # 2.1. generate fake image
                self.G.zero_grad()
                g_noise, fake_tag = utils.fake_generator(
                    batch_size, noise_size, device)
                fake_feat = torch.cat([g_noise, fake_tag], dim=1)
                fake_img = self.G(fake_feat)
                fake_label_p, fake_tag_p = self.D(fake_img)
                label.data.fill_(1.0)

                # 2.2. calc loss
                label_loss_g = self.label_criterion(fake_label_p, label)
                tag_loss_g = self.tag_criterion(fake_tag_p, fake_tag)
                loss_g = label_loss_g * lambda_adv / 2.0 + tag_loss_g * lambda_adv / 2.0
                loss_g.backward()

                # 2.2. update optimizer
                self.optimizer_G.step()

                if iteration % verbose_T == 0:
                    print('The iteration is now %d' % iteration)
                    print('The loss is %.4f, %.4f, %.4f, %.4f' %
                          (real_loss_sum, fake_loss_sum, gradient_penalty,
                           loss_g))
                    vutils.save_image(
                        anime_img.data.view(batch_size, 3, anime_img.size(2),
                                            anime_img.size(3)),
                        os.path.join(
                            tmp_path, 'real_image_{}.png'.format(
                                str(iteration).zfill(8))))
                    g_noise, fake_tag = utils.fake_generator(
                        batch_size, noise_size, device)
                    fake_feat = torch.cat([g_noise, fake_tag], dim=1)
                    fake_img = self.G(fake_feat)
                    vutils.save_image(
                        fake_img.data.view(batch_size, 3, anime_img.size(2),
                                           anime_img.size(3)),
                        os.path.join(
                            tmp_path, 'fake_image_{}.png'.format(
                                str(iteration).zfill(8))))
            # dump checkpoint
            torch.save(
                {
                    'epoch': self.epoch,
                    'D': self.D.state_dict(),
                    'G': self.G.state_dict(),
                    'optimizer_D': self.optimizer_D.state_dict(),
                    'optimizer_G': self.optimizer_G.state_dict(),
                }, '{}/checkpoint_{}.tar'.format(model_dump_path,
                                                 str(self.epoch).zfill(4)))
            self.epoch += 1
        x = self.conv_last(x)
        x = self.bn_last(x)
        x = self.activation(x)
        #
        # # average pooling layer
        # x = self.avgpool(x)
        # print(x.shape)
        # # flatten for input to fully-connected layer
        # x = x.view(x.size(0), -1)
        # x = self.fc(x)
        return output[0], output[1], x
        # return x#F.log_softmax(x, dim=1)

    #这个是速度测试
if __name__ == "__main__":


    model = ShuffleNetV2(scale=1, in_channels=3, c_tag=0.5, num_classes=2, activation=nn.ReLU,
                          SE=False, residual=False)

    for i in range(3):
        t1 = time.time()
        x = torch.rand(1,3, 352, 352)
        out3, out4, out5 = model(x)
        # print(out3)
        print(out3.size())
        print(out4.size())
        print(out5.size())
        cnt = time.time() - t1
        print(cnt)
Exemple #35
0
    def summary(self, input_size):
       	def register_hook(module):
            def hook(module, input, output):
                if module._modules:  # only want base layers
                    return
                class_name = str(module.__class__).split('.')[-1].split("'")[0]
                module_idx = len(summary)
                m_key = '%s-%i' % (class_name, module_idx + 1)
                summary[m_key] = OrderedDict()
                summary[m_key]['input_shape'] = list(input[0].size())
                summary[m_key]['input_shape'][0] = None
                if output.__class__.__name__ == 'tuple':
                    summary[m_key]['output_shape'] = list(output[0].size())
                else:
                    summary[m_key]['output_shape'] = list(output.size())
                summary[m_key]['output_shape'][0] = None

                params = 0
                # iterate through parameters and count num params
                for name, p in module._parameters.items():
                    params += torch.numel(p.data)
                    summary[m_key]['trainable'] = p.requires_grad

                summary[m_key]['nb_params'] = params

            if not isinstance(module, torch.nn.Sequential) and \
               not isinstance(module, torch.nn.ModuleList) and \
               not (module == self):
                hooks.append(module.register_forward_hook(hook))

        # check if there are multiple inputs to the network
        if isinstance(input_size[0], (list, tuple)):
            x = [(torch.rand(1, *in_size)) for in_size in input_size]
        else:
            x = (torch.randn(1, *input_size))

        # create properties
        summary = OrderedDict()
        hooks = []
        # register hook
        self.apply(register_hook)
        # make a forward pass
        self(x)
        # remove these hooks
        for h in hooks:
            h.remove()

        # print out neatly
        def get_names(module, name, acc):
            if not module._modules:
                acc.append(name)
            else:
                for key in module._modules.keys():
                    p_name = key if name == "" else name + "." + key
                    get_names(module._modules[key], p_name, acc)
        names = []
        get_names(self, "", names)

        col_width = 25  # should be >= 12
        summary_width = 61

        def crop(s):
            return s[:col_width] if len(s) > col_width else s

        print('_' * summary_width)
        print('{0: <{3}} {1: <{3}} {2: <{3}}'.format(
            'Layer (type)', 'Output Shape', 'Param #', col_width))
        print('=' * summary_width)
        total_params = 0
        trainable_params = 0
        for (i, l_type), l_name in zip(enumerate(summary), names):
            d = summary[l_type]
            total_params += d['nb_params']
            if 'trainable' in d and d['trainable']:
                trainable_params += d['nb_params']
            print('{0: <{3}} {1: <{3}} {2: <{3}}'.format(
                crop(l_name + ' (' + l_type[:-2] + ')'), crop(str(d['output_shape'])),
                crop(str(d['nb_params'])), col_width))
            if i < len(summary) - 1:
                print('_' * summary_width)
        print('=' * summary_width)
        print('Total params: ' + str(total_params))
        print('Trainable params: ' + str(trainable_params))
        print('Non-trainable params: ' + str((total_params - trainable_params)))
        print('_' * summary_width)
Exemple #36
0
def get_summary(net, input_size, batch_size=1, device="cuda", verbose=False):
    s = ""
    mdict = {}
    for n,m in net.named_modules():
        mdict[n] = m
    def register_hook(module):
        def hook(module, input, output):
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)
            m_key = next(n for n,m in mdict.items() if m==module)
            summary[m_key] = OrderedDict()
            summary[m_key]["input_shape"] = list(input[0].size())
            summary[m_key]["input_shape"][0] = batch_size
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [
                    [-1] + list(o.size())[1:] for o in output
                ]
            else:
                summary[m_key]["output_shape"] = list(output.size())
                summary[m_key]["output_shape"][0] = batch_size

            params = 0
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                try:
                    params += torch.prod(torch.LongTensor(list(module.weight.size()))) / module.group
                except AttributeError:
                    params += torch.prod(torch.LongTensor(list(module.weight.size())))
                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                params += torch.prod(torch.LongTensor(list(module.bias.size())))
            summary[m_key]["nb_params"] = params
            
            if hasattr(module, "W_precision"):
                summary[m_key]['W_bits'] = module.W_precision.get_bits()
            
            if hasattr(module, "precision"):
                summary[m_key]['bits'] = module.precision.get_bits()

        if (
            not isinstance(module, torch.nn.Sequential)
            and not isinstance(module, torch.nn.ModuleList)
            and not (module == net)
        ):
            hooks.append(module.register_forward_hook(hook))

    device = device.lower()
    assert device in [
        "cuda",
        "cpu",
    ], "Input device is not valid, please specify 'cuda' or 'cpu'"

    if device == "cuda" and torch.cuda.is_available():
        dtype = torch.cuda.FloatTensor
    else:
        dtype = torch.FloatTensor

    # multiple inputs to the network
    if isinstance(input_size, tuple):
        input_size = [input_size]

    # batch_size of 2 for batchnorm
    x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]

    # create properties
    summary = OrderedDict()
    hooks = []

    # register hook
    net.apply(register_hook)

    # make a forward pass
    net(*x)

    # remove these hooks
    for h in hooks:
        h.remove()

    s += "----------------------------------------------------------------" + "\n"
    line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
    s += line_new + "\n"
    s += "================================================================" + "\n"
    total_params = 0
    total_output = 0
    trainable_params = 0
    params_size = 0
    output_size = 0
    input_size = 0
    for layer in summary:
        # input_shape, output_shape, trainable, nb_params
        line_new = "{:>20}  {:>25} {:>15}".format(
            layer,
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
        )
        total_params += summary[layer]["nb_params"]
        try:
            params_size += abs(summary[layer]["nb_params"]  * summary[layer]["W_bits"] / 8. / (1024.))
        except KeyError:
            params_size += abs(summary[layer]["nb_params"] * 32. / 8. / (1024.))
        total_output += np.prod(summary[layer]["output_shape"])
        try:
            output_size = max(output_size, np.prod(summary[layer]["output_shape"]) * summary[layer]["bits"] / 8 / (1024.))
        except KeyError:
            output_size = max(output_size, np.prod(summary[layer]["output_shape"]) * 32 / 8 / (1024.))
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        s += line_new + "\n"

    s += "================================================================" + "\n"
    s += "Total params: {0:,}".format(total_params) + "\n"
    s += "Trainable params: {0:,}".format(trainable_params) + "\n"
    s += "Non-trainable params: {0:,}".format(total_params - trainable_params) + "\n"
    s += "----------------------------------------------------------------" + "\n"
    s += "Biggest activation tensor size (kB): {0:,.2f}".format(output_size) + "\n"
    s += "Params size (kB): {0:,.1f}".format(params_size) + "\n"
    s += "----------------------------------------------------------------" + "\n"
    if verbose:
        logging.info(s)
    return { 'dict': summary, 'prettyprint': s, 'biggest_activation': output_size, 'params_size': params_size }
    def __init__(self, dev):
        super().__init__()
        n = 8
        # Utility arguments, created as one-element tuples
        pointwise0_bf16 = (torch.randn(n, dtype=torch.bfloat16, device=dev),)
        pointwise1_bf16 = (torch.randn(n, dtype=torch.bfloat16, device=dev),)
        pointwise2_bf16 = (torch.randn(n, dtype=torch.bfloat16, device=dev),)
        mat0_bf16 = (torch.randn((n, n), dtype=torch.bfloat16, device=dev),)
        mat1_bf16 = (torch.randn((n, n), dtype=torch.bfloat16, device=dev),)
        mat2_bf16 = (torch.randn((n, n), dtype=torch.bfloat16, device=dev),)

        dummy_dimsets = ((n,), (n, n), (n, n, n), (n, n, n, n), (n, n, n, n, n))

        dummy_bf16 = [(torch.randn(dimset, dtype=torch.bfloat16, device=dev),)
                      for dimset in dummy_dimsets]

        dimsets = ((n, n, n), (n, n, n, n), (n, n, n, n, n))
        conv_args_bf16 = [(torch.randn(dimset, dtype=torch.bfloat16, device=dev),
                           torch.randn(dimset, dtype=torch.bfloat16, device=dev))
                          for dimset in dimsets]
        conv_args_fp32 = [(torch.randn(dimset, dtype=torch.float32, device=dev),
                           torch.randn(dimset, dtype=torch.float32, device=dev))
                          for dimset in dimsets]

        bias_fp32 = (torch.randn((n,), dtype=torch.float32, device=dev),)
        element0_fp32 = (torch.randn(1, dtype=torch.float32, device=dev),)
        pointwise0_fp32 = (torch.randn(n, dtype=torch.float32, device=dev),)
        pointwise1_fp32 = (torch.randn(n, dtype=torch.float32, device=dev),)
        mat0_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),)
        mat1_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),)
        mat2_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),)
        mat3_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),)

        dummy_fp32 = [(torch.randn(dimset, dtype=torch.float32, device=dev),)
                      for dimset in dummy_dimsets]
        # The lists below organize ops that autocast needs to test.
        # self.list_name corresponds to test_autocast_list_name in test/test_cpu.py.
        # Each op is associated with a tuple of valid arguments.

        # Some ops implement built-in type promotion.  These don't need autocasting,
        # but autocasting relies on their promotion, so we include tests to double-check.
        self.torch_expect_builtin_promote = [
            ("eq", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("ge", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("gt", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("le", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("lt", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("ne", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("add", pointwise0_fp32 + pointwise1_bf16, torch.float32),
            ("div", pointwise0_fp32 + pointwise1_bf16, torch.float32),
            ("mul", pointwise0_fp32 + pointwise1_bf16, torch.float32),
        ]
        self.methods_expect_builtin_promote = [
            ("__eq__", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("__ge__", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("__gt__", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("__le__", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("__lt__", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("__ne__", pointwise0_fp32 + pointwise1_bf16, torch.bool),
            ("__add__", pointwise0_fp32 + pointwise1_bf16, torch.float32),
            ("__div__", pointwise0_fp32 + pointwise1_bf16, torch.float32),
            ("__mul__", pointwise0_fp32 + pointwise1_bf16, torch.float32),
        ]
        # The remaining lists organize ops that autocast treats explicitly.
        self.torch_bf16 = [
            ("conv1d", conv_args_fp32[0]),
            ("conv2d", conv_args_fp32[1]),
            ("conv3d", conv_args_fp32[2]),
            ("bmm", (torch.randn((n, n, n), device=dev, dtype=torch.float32),
                     torch.randn((n, n, n), device=dev, dtype=torch.float32))),
            ("mm", mat0_fp32 + mat1_fp32),
            ("matmul", mat0_fp32 + mat1_fp32),
            ("baddbmm", (torch.randn((n, n, n), device=dev, dtype=torch.float32),
                         torch.randn((n, n, n), device=dev, dtype=torch.float32),
                         torch.randn((n, n, n), device=dev, dtype=torch.float32))),
            ("addmm", mat1_fp32 + mat2_fp32 + mat3_fp32),
            ("addbmm", mat0_fp32 + (torch.randn((n, n, n), device=dev, dtype=torch.float32),
                                    torch.randn((n, n, n), device=dev, dtype=torch.float32))),
            ("conv_tbc", (torch.randn((10, 7, 3), device=dev, dtype=torch.float32),
                          torch.randn((5, 3, 5), device=dev, dtype=torch.float32),
                          torch.randn(5, device=dev, dtype=torch.float32),
                          0)),
        ]
        self.torch_fp32 = [
            ("conv_transpose1d", conv_args_bf16[0]),
            ("conv_transpose2d", conv_args_bf16[1]),
            ("conv_transpose3d", conv_args_bf16[2]),
        ]
        self.nn_bf16 = [
            ("linear", mat0_fp32 + mat1_fp32, {}),
        ]
        self.nn_fp32 = [
            ("avg_pool3d", dummy_bf16[3], {"kernel_size": (3, 3, 3), "stride": (1, 1, 1)}),
            ("binary_cross_entropy", (torch.rand((n, n), device=dev, dtype=torch.bfloat16),) +
                                     (torch.rand((n, n), device=dev, dtype=torch.bfloat16),)),
            ("reflection_pad1d", dummy_bf16[2], {"padding": (3, 3)}),
        ]
        self.torch_need_autocast_promote = [
            ("cat", (pointwise0_bf16 + pointwise1_fp32,)),
            ("stack", (pointwise0_bf16 + pointwise1_fp32,)),
        ]
def train(net,
          feature,
          image_id,
          old_embedding,
          target_embedding,
          idx_modified,
          idx_old_neighbors,
          idx_new_neighbors,
          idx_negatives,
          lr=1e-3,
          experiment_id=None,
          socket_id=None,
          scale_func=None,
          categories=None,
          label=None):
    global cycle, previously_modified
    cycle += 1
    # log and saving options
    exp_name = 'MapNet'

    if experiment_id is not None:
        exp_name = experiment_id + '_' + exp_name

    log = TBPlotter(os.path.join('runs/mapping', 'tensorboard', exp_name))
    log.print_logdir()

    outpath_config = os.path.join('runs/mapping', exp_name, 'configs')
    if not os.path.isdir(outpath_config):
        os.makedirs(outpath_config)
    outpath_embedding = os.path.join('runs/mapping', exp_name, 'embeddings')
    if not os.path.isdir(outpath_embedding):
        os.makedirs(outpath_embedding)
    outpath_feature = os.path.join('runs/mapping', exp_name, 'features')
    if not os.path.isdir(outpath_feature):
        os.makedirs(outpath_feature)
    outpath_model = os.path.join('runs/mapping', exp_name, 'models')
    if not os.path.isdir(outpath_model):
        os.makedirs(outpath_model)

    # general
    N = len(feature)
    use_cuda = torch.cuda.is_available()
    if not isinstance(old_embedding, torch.Tensor):
        old_embedding = torch.from_numpy(old_embedding.copy())
    if not isinstance(target_embedding, torch.Tensor):
        target_embedding = torch.from_numpy(target_embedding.copy())

    if use_cuda:
        net = net.cuda()
    net.train()

    # Set up differend groups of indices
    # each sample belongs to one group exactly, hierarchy is as follows:
    # 1: samples moved by user in this cycle
    # 2: negatives selected through neighbor method
    # 3: new neighborhood
    # 4: samples moved by user in previous cycles
    # 5: old neighborhood
    # 5: high dimensional neighborhood of moved samples
    # 6: fix points / unrelated (remaining) samples

    # # find high dimensional neighbors
    idx_high_dim_neighbors, _ = svm_k_nearest_neighbors(
        feature,
        np.union1d(idx_modified, idx_new_neighbors),
        negative_idcs=idx_negatives,
        k=100
    )  # use the first 100 nn of modified samples          # TODO: Better rely on distance

    # ensure there is no overlap between different index groups
    idx_modified = np.setdiff1d(
        idx_modified, idx_negatives
    )  # just ensure in case negatives have moved accidentially    TODO: BETTER FILTER BEFORE
    idx_new_neighbors = np.setdiff1d(
        idx_new_neighbors, np.concatenate([idx_modified, idx_negatives]))
    idx_previously_modified = np.setdiff1d(
        previously_modified,
        np.concatenate([idx_modified, idx_new_neighbors, idx_negatives]))
    idx_old_neighbors = np.setdiff1d(
        np.concatenate([idx_old_neighbors, idx_high_dim_neighbors]),
        np.concatenate([
            idx_modified, idx_new_neighbors, idx_previously_modified,
            idx_negatives
        ]))
    idx_fix_points = np.setdiff1d(
        range(N),
        np.concatenate([
            idx_modified, idx_new_neighbors, idx_previously_modified,
            idx_old_neighbors, idx_negatives
        ]))

    for i, g1 in enumerate([
            idx_modified, idx_new_neighbors, idx_previously_modified,
            idx_old_neighbors, idx_fix_points, idx_negatives
    ]):
        for j, g2 in enumerate([
                idx_modified, idx_new_neighbors, idx_previously_modified,
                idx_old_neighbors, idx_fix_points, idx_negatives
        ]):
            if i != j and len(np.intersect1d(g1, g2)) != 0:
                print('groups: {}, {}'.format(i, j))
                print(np.intersect1d(g1, g2))
                raise RuntimeError('Index groups overlap.')

    print('Group Overview:'
          '\n\tModified samples: {}'
          '\n\tNegative samples: {}'
          '\n\tNew neighbors: {}'
          '\n\tPreviously modified samples: {}'
          '\n\tOld neighbors: {}'
          '\n\tFix points: {}'.format(len(idx_modified), len(idx_negatives),
                                      len(idx_new_neighbors),
                                      len(idx_previously_modified),
                                      len(idx_old_neighbors),
                                      len(idx_fix_points)))

    # modify label
    label[idx_modified, -1] = 'modified'
    label[idx_negatives, -1] = 'negative'
    label[idx_previously_modified, -1] = 'prev_modified'
    label[idx_new_neighbors, -1] = 'new neighbors'
    label[idx_old_neighbors, -1] = 'old neighbors'
    label[idx_high_dim_neighbors, -1] = 'high dim neighbors'
    label[idx_fix_points, -1] = 'other'

    optimizer = torch.optim.Adam(
        [p for p in net.parameters() if p.requires_grad], lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           'min',
                                                           patience=5,
                                                           threshold=1e-3,
                                                           verbose=True)

    kl_criterion = TSNELoss(N, use_cuda=use_cuda)
    l2_criterion = torch.nn.MSELoss(reduction='none')  # keep the output fixed
    noise_criterion = NormalizedMSE()

    # define the index samplers for data

    batch_size = 500
    max_len = max(
        len(idx_modified) + len(idx_previously_modified), len(idx_negatives),
        len(idx_new_neighbors), len(idx_old_neighbors), len(idx_fix_points))
    if max_len == len(idx_fix_points):
        n_batches = max_len / (batch_size * 2) + 1
    else:
        n_batches = max_len / batch_size + 1

    sampler_modified = torch.utils.data.BatchSampler(
        sampler=torch.utils.data.SubsetRandomSampler(idx_modified),
        batch_size=batch_size,
        drop_last=False)

    sampler_negatives = torch.utils.data.BatchSampler(
        sampler=torch.utils.data.SubsetRandomSampler(idx_negatives),
        batch_size=batch_size,
        drop_last=False)

    sampler_new_neighbors = torch.utils.data.BatchSampler(
        sampler=torch.utils.data.SubsetRandomSampler(idx_new_neighbors),
        batch_size=batch_size,
        drop_last=False)

    sampler_prev_modified = torch.utils.data.BatchSampler(
        sampler=torch.utils.data.SubsetRandomSampler(idx_previously_modified),
        batch_size=batch_size,
        drop_last=False)

    sampler_old_neighbors = torch.utils.data.BatchSampler(
        sampler=torch.utils.data.SubsetRandomSampler(idx_old_neighbors),
        batch_size=batch_size,
        drop_last=False)

    sampler_high_dim_neighbors = torch.utils.data.BatchSampler(
        sampler=torch.utils.data.SubsetRandomSampler(idx_high_dim_neighbors),
        batch_size=batch_size,
        drop_last=False)

    sampler_fixed = torch.utils.data.BatchSampler(
        sampler=torch.utils.data.SubsetRandomSampler(idx_fix_points),
        batch_size=2 * batch_size,
        drop_last=False)

    # train network until scheduler reduces learning rate to threshold value
    lr_threshold = 1e-5
    track_l2_loss = ChangeRateLogger(n_track=5,
                                     threshold=5e-2,
                                     order='smaller')
    track_noise_reg = ChangeRateLogger(
        n_track=10, threshold=-1,
        order='smaller')  # only consider order --> negative threshold
    stop_criterion = False

    embeddings = {}
    model_states = {}
    cpu_net = copy.deepcopy(net).cpu() if use_cuda else net
    model_states[0] = {
        'epoch': 0,
        'loss': float('inf'),
        'state_dict': cpu_net.state_dict().copy(),
        'optimizer': optimizer.state_dict().copy(),
        'scheduler': scheduler.state_dict().copy()
    }
    embeddings[0] = old_embedding.numpy().copy()

    epoch = 1
    new_features = feature.copy()
    new_embedding = old_embedding.numpy().copy()

    t_beta = []
    t_train = []
    t_tensorboard = []
    t_save = []
    t_send = []
    t_iter = []

    tensor_feature = torch.from_numpy(feature)
    norms = torch.norm(tensor_feature, p=2, dim=1)
    feature_norm = torch.mean(norms)
    norm_margin = norms.std()
    norm_criterion = SoftNormLoss(norm_value=feature_norm, margin=norm_margin)
    # distance_criterion = NormalizedDistanceLoss()
    # distance_criterion = ContrastiveNormalizedDistanceLoss(margin=0.2 * feature_norm)
    triplet_margin = feature_norm
    triplet_selector = SemihardNegativeTripletSelector(
        margin=triplet_margin,
        cpu=False,
        preselect_index_positives=10,
        preselect_index_negatives=1,
        selection='random')
    distance_criterion = TripletLoss(margin=triplet_margin,
                                     triplet_selector=triplet_selector)
    negative_triplet_collector = []

    del norms
    while not stop_criterion:
        # if epoch < 30:           # do not use dropout at first
        #     net.eval()
        # else:
        net.train()

        t_iter_start = time.time()

        # compute beta for kl loss
        t_beta_start = time.time()
        kl_criterion._compute_beta(new_features)
        t_beta_end = time.time()
        t_beta.append(t_beta_end - t_beta_start)

        # set up losses
        l2_losses = AverageMeter()
        kl_losses = AverageMeter()
        distance_losses = AverageMeter()
        noise_regularization = AverageMeter()
        feature_norm = AverageMeter()
        norm_losses = AverageMeter()
        weight_regularization = AverageMeter()
        losses = AverageMeter()

        t_load = []
        t_forward = []
        t_loss = []
        t_backprop = []
        t_update = []
        t_tot = []

        # iterate over fix points (assume N_fixpoints >> N_modified)
        t_train_start = time.time()
        t_load_start = time.time()
        batch_loaders = []
        for smplr in [
                sampler_modified, sampler_negatives, sampler_new_neighbors,
                sampler_prev_modified, sampler_old_neighbors, sampler_fixed,
                sampler_high_dim_neighbors
        ]:
            batches = list(smplr)
            if len(batches) == 0:
                batches = [[] for i in range(n_batches)]
            while len(batches) < n_batches:
                to = min(n_batches - len(batches), len(batches))
                batches.extend(list(smplr)[:to])
            batch_loaders.append(batches)

        for batch_idx in range(n_batches):
            t_tot_start = time.time()

            moved_indices = batch_loaders[0][batch_idx]
            negatives_indices = batch_loaders[1][batch_idx]
            new_neigh_indices = batch_loaders[2][batch_idx]
            prev_moved_indices = batch_loaders[3][batch_idx]
            old_neigh_indices = batch_loaders[4][batch_idx]
            fixed_indices = batch_loaders[5][batch_idx]
            high_neigh_indices = batch_loaders[6][batch_idx]
            n_moved, n_neg, n_new, n_prev, n_old, n_fixed, n_high = (
                len(moved_indices), len(negatives_indices),
                len(new_neigh_indices), len(prev_moved_indices),
                len(old_neigh_indices), len(fixed_indices),
                len(high_neigh_indices))

            # load data
            indices = np.concatenate([
                new_neigh_indices, moved_indices, negatives_indices,
                prev_moved_indices, fixed_indices, old_neigh_indices,
                high_neigh_indices
            ]).astype(long)
            if len(indices) < 3 * kl_criterion.perplexity + 2:
                continue
            data = tensor_feature[indices]
            input = torch.autograd.Variable(
                data.cuda()) if use_cuda else torch.autograd.Variable(data)

            t_load_end = time.time()
            t_load.append(t_load_end - t_load_start)

            # compute forward
            t_forward_start = time.time()

            fts_mod = net.mapping(input)
            # fts_mod_noise = net.mapping(input + 0.1 * torch.rand(input.shape).type_as(input))
            fts_mod_noise = net.mapping(input +
                                        torch.rand(input.shape).type_as(input))
            emb_mod = net.embedder(torch.nn.functional.relu(fts_mod))

            t_forward_end = time.time()
            t_forward.append(t_forward_end - t_forward_start)

            # compute losses
            # modified --> KL, L2, Dist
            # new neighborhood --> KL, Dist
            # previously modified --> KL, L2
            # old neighborhood + high dimensional neighborhood --> KL
            # fix point samples --> KL, L2

            t_loss_start = time.time()

            noise_reg = noise_criterion(fts_mod, fts_mod_noise)
            noise_regularization.update(noise_reg.data, len(data))

            kl_loss = kl_criterion(fts_mod, emb_mod, indices)
            kl_losses.update(kl_loss.data, len(data))

            idx_l2_fixed = np.concatenate([
                new_neigh_indices, moved_indices, negatives_indices,
                prev_moved_indices, fixed_indices
            ]).astype(long)
            l2_loss = torch.mean(l2_criterion(
                emb_mod[:n_new + n_moved + n_neg + n_prev + n_fixed],
                target_embedding[idx_l2_fixed].type_as(emb_mod)),
                                 dim=1)
            # weigh loss of space samples equally to all modified samples
            l2_loss = 0.5 * torch.mean(l2_loss[:n_new + n_moved + n_neg + n_prev]) + \
                      0.5 * torch.mean(l2_loss[n_new + n_moved + n_neg + n_prev:])

            l2_losses.update(l2_loss.data, len(idx_l2_fixed))

            if epoch < 0:
                distance_loss = torch.tensor(0.)
            else:
                # distance_loss = distance_criterion(fts_mod[:n_new + n_moved])
                distance_loss_input = fts_mod[:-(n_old + n_high)] if (
                    n_old + n_high) > 0 else fts_mod
                distance_loss_target = torch.cat([
                    torch.ones(n_new + n_moved),
                    torch.zeros(n_neg + n_prev + n_fixed)
                ])
                distance_loss_weights = torch.cat([
                    torch.ones(n_new + n_moved + n_neg + n_prev),
                    0.5 * torch.ones(n_fixed)
                ])
                # also use high dimensional nn
                # distance_loss_weights = torch.cat([torch.ones(n_new+n_moved+n_neg+n_prev+n_fixed), 0.5*torch.ones(len(high_dim_nn))])

                # if len(high_dim_nn) > 0:
                #     distance_loss_input = torch.cat([distance_loss_input, fts_mod[high_dim_nn]])
                #     distance_loss_target = torch.cat([distance_loss_target, torch.ones(len(high_dim_nn))])
                if n_neg > 0:
                    selected_negatives = {
                        1: np.arange(n_new + n_moved, n_new + n_moved + n_neg)
                    }
                else:
                    selected_negatives = None

                distance_loss, negative_triplets = distance_criterion(
                    distance_loss_input,
                    distance_loss_target,
                    concealed_classes=[0],
                    weights=distance_loss_weights,
                    selected_negatives=selected_negatives)
                if negative_triplets is not None:
                    negative_triplets = np.unique(negative_triplets.numpy())
                    negative_triplets = indices[:-(n_old +
                                                   n_high)][negative_triplets]
                    negative_triplet_collector.extend(negative_triplets)
                distance_loss_noise, _ = distance_criterion(
                    distance_loss_input +
                    torch.rand(distance_loss_input.shape).type_as(
                        distance_loss_input),
                    distance_loss_target,
                    concealed_classes=[0])
                distance_loss = 0.5 * distance_loss + 0.5 * distance_loss_noise

            distance_losses.update(distance_loss.data, n_new + n_moved)

            # norm_loss = norm_criterion(torch.mean(fts_mod.norm(p=2, dim=1)))
            # norm_losses.update(norm_loss.data, len(data))

            weight_reg = torch.autograd.Variable(
                torch.tensor(0.)).type_as(l2_loss)
            for param in net.mapping.parameters():
                weight_reg += param.norm(1)
            weight_regularization.update(weight_reg, len(data))

            loss = 1 * distance_loss.type_as(l2_loss) + 5 * l2_loss + 10 * kl_loss.type_as(l2_loss) + \
                   1e-5 * weight_reg.type_as(l2_loss) #+ norm_loss.type_as(l2_loss)\ 1e3 * noise_reg.type_as(l2_loss)
            losses.update(loss.data, len(data))

            t_loss_end = time.time()
            t_loss.append(t_loss_end - t_loss_start)

            feature_norm.update(
                torch.mean(fts_mod.norm(p=2, dim=1)).data, len(data))

            # backprop

            t_backprop_start = time.time()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            t_backprop_end = time.time()
            t_backprop.append(t_backprop_end - t_backprop_start)

            # update

            t_update_start = time.time()

            # update current embedding
            new_embedding[indices] = emb_mod.data.cpu().numpy()

            t_update_end = time.time()
            t_update.append(t_update_end - t_update_start)

            if epoch > 5 and (batch_idx + 1) * batch_size >= 2000:
                print('\tend epoch after {} random fix point samples'.format(
                    (batch_idx + 1) * batch_size))
                break

            t_tot_end = time.time()
            t_tot.append(t_tot_end - t_tot_start)

            t_load_start = time.time()

        # print('Times:'
        #       '\n\tLoader: {})'
        #       '\n\tForward: {})'
        #       '\n\tLoss: {})'
        #       '\n\tBackprop: {})'
        #       '\n\tUpdate: {})'
        #       '\n\tTotal: {})'.format(
        #     np.mean(t_load),
        #     np.mean(t_forward),
        #     np.mean(t_loss),
        #     np.mean(t_backprop),
        #     np.mean(t_update),
        #     np.mean(t_tot),
        # ))

        t_train_end = time.time()
        t_train.append(t_train_end - t_train_start)

        t_tensorboard_start = time.time()
        scheduler.step(losses.avg)
        label[np.unique(negative_triplet_collector), -1] = 'negative triplet'
        log.write('l2_loss', float(l2_losses.avg), epoch, test=False)
        log.write('distance_loss',
                  float(distance_losses.avg),
                  epoch,
                  test=False)
        log.write('kl_loss', float(kl_losses.avg), epoch, test=False)
        log.write('noise_regularization',
                  float(noise_regularization.avg),
                  epoch,
                  test=False)
        log.write('feature_norm', float(feature_norm.avg), epoch, test=False)
        log.write('norm_loss', float(norm_losses.avg), epoch, test=False)
        log.write('weight_reg',
                  float(weight_regularization.avg),
                  epoch,
                  test=False)
        log.write('loss', float(losses.avg), epoch, test=False)
        t_tensorboard_end = time.time()
        t_tensorboard.append(t_tensorboard_end - t_tensorboard_start)

        t_save_start = time.time()

        cpu_net = copy.deepcopy(net).cpu() if use_cuda else net

        model_states[epoch] = {
            'epoch': epoch,
            'loss': losses.avg.cpu(),
            'state_dict': cpu_net.state_dict().copy(),
            'optimizer': optimizer.state_dict().copy(),
            'scheduler': scheduler.state_dict().copy()
        }
        embeddings[epoch] = new_embedding

        t_save_end = time.time()
        t_save.append(t_save_end - t_save_start)

        print('Train Epoch: {}\t'
              'Loss: {:.4f}\t'
              'L2 Loss: {:.4f}\t'
              'Distance Loss: {:.4f}\t'
              'KL Loss: {:.4f}\t'
              'Noise Regularization: {:.4f}\t'
              'Weight Regularization: {:.4f}\t'
              'LR: {:.6f}'.format(epoch, float(losses.avg),
                                  float(5 * l2_losses.avg),
                                  float(0.5 * distance_losses.avg),
                                  float(10 * kl_losses.avg),
                                  float(noise_regularization.avg),
                                  float(1e-5 * weight_regularization.avg),
                                  optimizer.param_groups[-1]['lr']))

        t_send_start = time.time()

        # send to server
        if socket_id is not None:
            position = new_embedding if scale_func is None else scale_func(
                new_embedding)
            nodes = make_nodes(position=position, index=True, label=label)
            send_payload(nodes, socket_id, categories=categories)

        t_send_end = time.time()
        t_send.append(t_send_end - t_send_start)

        epoch += 1
        l2_stop_criterion = track_l2_loss.add_value(l2_losses.avg)
        epoch_stop_criterion = epoch > 150
        regularization_stop_criterion = False  #track_noise_reg.add_value(noise_regularization.avg)
        lr_stop_criterion = optimizer.param_groups[-1]['lr'] < lr_threshold
        stop_criterion = any([
            l2_stop_criterion, regularization_stop_criterion,
            lr_stop_criterion, epoch_stop_criterion
        ])

        t_iter_end = time.time()
        t_iter.append(t_iter_end - t_iter_start)

    print('Times:'
          '\n\tBeta: {})'
          '\n\tTrain: {})'
          '\n\tTensorboard: {})'
          '\n\tSave: {})'
          '\n\tSend: {})'
          '\n\tIteration: {})'.format(
              np.mean(t_beta),
              np.mean(t_train),
              np.mean(t_tensorboard),
              np.mean(t_save),
              np.mean(t_send),
              np.mean(t_iter),
          ))

    print('Training details: '
          '\n\tMean: {}'
          '\n\tMax: {} ({})'
          '\n\tMin: {} ({})'.format(np.mean(t_train), np.max(t_train),
                                    np.argmax(t_train), np.min(t_train),
                                    np.argmin(t_train)))

    previously_modified = np.append(previously_modified, idx_modified)

    # compute new features
    new_features = get_feature(net.mapping, feature)

    # print('Save output files...')
    # write output files for the cycle
    outfile_config = os.path.join(outpath_config,
                                  'cycle_{:03d}_config.pkl'.format(cycle))
    outfile_embedding = os.path.join(
        outpath_embedding, 'cycle_{:03d}_embeddings.hdf5'.format(cycle))
    outfile_feature = os.path.join(outpath_feature,
                                   'cycle_{:03d}_feature.hdf5'.format(cycle))
    outfile_model_states = os.path.join(
        outpath_model, 'cycle_{:03d}_models.pth.tar'.format(cycle))

    with h5py.File(outfile_embedding, 'w') as f:
        f.create_dataset(name='image_id',
                         shape=image_id.shape,
                         dtype=image_id.dtype,
                         data=image_id)
        for epoch in embeddings.keys():
            data = embeddings[epoch]
            f.create_dataset(name='epoch_{:04d}'.format(epoch),
                             shape=data.shape,
                             dtype=data.dtype,
                             data=data)
    print('\tSaved {}'.format(os.path.join(os.getcwd(), outfile_embedding)))

    with h5py.File(outfile_feature, 'w') as f:
        f.create_dataset(name='feature',
                         shape=new_features.shape,
                         dtype=new_features.dtype,
                         data=new_features)
        f.create_dataset(name='image_id',
                         shape=image_id.shape,
                         dtype=image_id.dtype,
                         data=image_id)
    print('\tSaved {}'.format(os.path.join(os.getcwd(), outfile_feature)))

    torch.save(model_states, outfile_model_states)
    print('\tSaved {}'.format(os.path.join(os.getcwd(), outfile_model_states)))

    # write config file
    config_dict = {
        'idx_modified': idx_modified,
        'idx_old_neighbors': idx_old_neighbors,
        'idx_new_neighbors': idx_new_neighbors,
        'idx_high_dim_neighbors': idx_high_dim_neighbors
    }
    with open(outfile_config, 'w') as f:
        pickle.dump(config_dict, f)
    print('\tSaved {}'.format(os.path.join(os.getcwd(), outfile_config)))

    print('Done.')

    print('Finished training.')
    return new_embedding
Exemple #39
0
print(conv2d.weight.data)
print(conv2d.bias.data)

# we use nn.Conv2d class to realize 2d convolution
# in_channels number of channels of input image (means colors)
# out_channels number of channels produced by the convolution
# kernel_size (int or tuple) size of the convolving kernel
# stride = step (int or tuple, optional) stride of the convolution default:1
# padding (int, tuple, optional) zero-padding added to both side of the input default: 0
# bias(bool, optional) -If true, adds a learnable bias to the output. Default: True

# forward function has four dimensions parameters(N(batch_size), Cin, Hin, Win)
# and the return value is also a 4 dimensions (N, Cout, Hout, Wout)

X = torch.rand(4, 2, 3, 5)

conv2d = nn.Conv2d(in_channels=2, out_channels=3, kernel_size=(3, 5), stride=1, padding=(1, 2))
Y = conv2d(X)
print('Y.shape: ', Y.shape)
print('weight.shape: ', conv2d.weight.shape)
print('bias.shape: ', conv2d.bias.shape)


# pooling in order to mitigate over sensitivity of the convolution layer
# usually we have max pooling or average pooling

X = torch.arange(32, dtype=torch.float32).view(1, 2, 4, 4)
pool2d = nn.MaxPool2d(kernel_size=3, padding=1, stride=(2, 1))
Y = pool2d(X)
print(X)
Exemple #40
0
    # Tensor of range(#frames): produces a sequence to be later compared with the discovered indices
    idx_e2 = torch.tensor([i for i in range(dims_1[2])]).unsqueeze(0).repeat(dims_1[0],1)

    # Find matching sequences per batch
    e1toe2 = torch.sum(idx_exp==idx_e2,dim=-1)==dims_1[2]

    # recursion for cyclic-back
    if check==2:
        return e1toe2
    else:
        e2toe1 = soft_nnc(embeddings2,embeddings1,check=2)

    # join together
    conditions = e1toe2+e2toe1

    # return only the batch indices
    return torch.where(conditions==True)[0]


if __name__ == "__main__":
    e1 = torch.rand(10,32,16)
    e2 = torch.rand(10,32,16)
    e3 = e1.clone()

    cyclic_c = soft_nnc(e1,e2)
    print('Test 1: e1 != e2 (indices):',cyclic_c.numpy(),'\n')

    cyclic_c = soft_nnc(e1,e3)
    print('Test 2: e1 == e3 (indices):',cyclic_c.numpy(),'\n')
def _dummy_image_loader(_):
    return torch.rand(3, 196, 196)
 def __init__(self, config):
     super().__init__()
     kernel_weight = torch.rand([config.model.hidden_size, config.model.head_num, config.model.head_dim])
     self.kernel = nn.Parameter(kernel_weight)
Exemple #43
0
 def rand(self, shape):
     return self.move(torch.rand(shape))
def test():
    import torch
    import matplotlib.pyplot as plt
    from matplotlib.patches import Circle
    import numpy as np
    from itertools import product, combinations
    from mpl_toolkits.mplot3d import Axes3D

    X = torch.randn(100, 2) * 0.5 + (torch.rand(1, 2).expand(100, 2) - 0.5) * 3
    xn = X.norm(2, -1)

    X[xn > 1] /= ((xn[xn > 1]).unsqueeze(-1).expand(
        (xn[xn > 1]).shape[0], 2) + 1e-3)

    mu = barycenter(X)

    ax = plt.subplot()
    p = Circle((0, 0), 1, edgecolor='b', lw=1, facecolor='none')
    ax.add_patch(p)
    plt.scatter(X[:, 0].numpy(), X[:, 1].numpy())
    plt.scatter(mu[0, 0].item(),
                mu[0, 1].item(),
                label="Poincare barycenter",
                marker="s",
                c="red",
                s=100.)
    plt.scatter(X.mean(0)[0].item(),
                X.mean(0)[1].item(),
                label="Euclidean barycenter",
                marker="s",
                c="green",
                s=100.)
    plt.legend()
    plt.show()
    print("3D")

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.set_aspect("equal")
    # draw sphere
    u, v = np.mgrid[0:2 * np.pi:20j, 0:np.pi:10j]
    x = np.cos(u) * np.sin(v)
    y = np.sin(u) * np.sin(v)
    z = np.cos(v)
    ax.plot_wireframe(x, y, z, color="r")

    X = torch.randn(100, 3) * 0.3 + (torch.rand(1, 3).expand(100, 3) - 0.5) * 3
    xn = X.norm(2, -1)

    X[xn > 1] /= ((xn[xn > 1]).unsqueeze(-1).expand(
        (xn[xn > 1]).shape[0], 3) + 1e-3)

    mu = barycenter(X)

    ax.scatter(X[:, 0].numpy(), X[:, 1].numpy(), X[:, 2].numpy())
    ax.scatter(mu[0, 0].item(),
               mu[0, 1].item(),
               mu[0, 2].item(),
               label="Poincare barycenter",
               marker="s",
               c="red",
               s=100.)
    ax.scatter(X.mean(0)[0].item(),
               X.mean(0)[1].item(),
               X.mean(0)[2].item(),
               label="Euclidean barycenter",
               marker="s",
               c="green",
               s=100.)
    ax.legend()
    plt.show()
def rand_uniform(shape, min_value, max_value):
    return torch.rand(shape) * (max_value - min_value) + min_value
Exemple #46
0
    def __init__(self,
                 embeddings_shape,
                 device,
                 parser,
                 pad_action,
                 opt,
                 n_features=768):
        """ Initialize the parser model.

        @param embeddings (Tensor): word embeddings (num_words, embedding_size)
        @param n_features (int): number of input features
        @param hidden_size (int): number of hidden units
        @param n_classes (int): number of output classes
        @param dropout_prob (float): dropout probability
        """
        super(ParserModel, self).__init__()
        
        ## initialization of parameters
        self.n_features = n_features
        self.n_classes = opt.nclass
        self.dropout_prob = opt.ffdropout
        self.hidden_size = opt.ffhidden
        self.embedding_size = embeddings_shape
        self.batch_size = opt.batchsize
        self.device = device
        self.n_layers_history = opt.nlayershistory
        self.max_step_length = opt.maxsteplength
        self.parser = parser
        self.pad_action = pad_action['P']
        self.num_labels = parser.n_transit-1
        self.hidden_size_label = opt.hiddensizelabel
        self.pooling_hid = opt.poolinghid
        self.fhistmodel = opt.fhistmodel
        self.use_justexist = opt.use_justexist
        ## initialization of embedding and bert model
        if opt.fcompmodel or opt.graphinput:
            self.label_emb = nn.Embedding(self.num_labels+1,self.n_features,padding_idx=self.num_labels)
        else:
            self.label_emb= None

        bertconfig = BertConfig(self.embedding_size, parser.n_transit-1,
                                              opt.labelemb, parser.P_NULL,opt.graphinput,
                                              opt.nattentionlayer,opt.nattentionheads,opt.fcompmodel,opt.seppoint,
                                              self.label_emb,opt.layernorm,opt.use_topbuffer,opt.use_justexist,
                                              opt.embsize,4*opt.embsize)
        self.bertmodel = BertModel(bertconfig)
        if opt.withbert:
            state_dict = torch.load('small_bert'+str(opt.outputname))
            self.bertmodel.load_state_dict(state_dict,strict=False)
            del state_dict
        else:
            state_dict_position = torch.load('position'+str(opt.outputname))
            self.bertmodel.embeddings.position_embeddings.load_state_dict(state_dict_position)
        
            if not opt.graphinput:
                state_dict_token = torch.load('token_type'+str(opt.outputname))
                self.bertmodel.embeddings.token_type_embeddings.load_state_dict(state_dict_token)
                del state_dict_token
        
            state_dict_word = torch.load('word_emb'+str(opt.outputname))
            self.bertmodel.embeddings.word_embeddings.load_state_dict(state_dict_word)

            del state_dict_position, state_dict_word

        ############################################################################################
        if opt.graphinput or opt.fcompmodel:
            self.bertmodel.embeddings.label_emb.weight[parser.n_transit-1].data.fill_(0.0)
            
        self.bertmodel.embeddings.word_embeddings.weight[parser.P_NULL].data.fill_(0.0)
        ############################################################################################
        
        ### initialization of lstm history model
        if self.fhistmodel:
            self.hist_size = opt.histsize
            self.action_emb = nn.Embedding(self.n_classes+1, self.hist_size)
            self.history = HistoryLSTM(self.hist_size,self.hist_size,self.n_layers_history)
        
            self.dtype = torch.cuda.FloatTensor if cuda.is_available() else torch.FloatTensor
            self.h0 = nn.Parameter(torch.rand(self.hist_size,requires_grad=True).type(self.dtype))
            self.c0 = nn.Parameter(torch.rand(self.hist_size,requires_grad=True).type(self.dtype))
            
        ## initialization of classifer
        if self.fhistmodel:
            self.embed_to_hidden = nn.Linear(self.n_features+self.hist_size, self.hidden_size)
        else:
            self.embed_to_hidden = nn.Linear(self.n_features, self.hidden_size)
            
        nn.init.xavier_uniform_(self.embed_to_hidden.weight)
        self.relu = nn.LeakyReLU()
        self.dropout = nn.Dropout(self.dropout_prob)
        self.hidden_to_logits = nn.Linear(self.hidden_size, self.n_classes)
        nn.init.xavier_uniform_(self.hidden_to_logits.weight)
        
        ## initializtion of label-classifier
        if self.fhistmodel:
            self.label_classifier = LabelClassifier(self.n_features+self.hist_size,
                                                    self.hidden_size_label,self.num_labels)
        else:
            self.label_classifier = LabelClassifier(self.n_features, self.hidden_size_label,self.num_labels)
    assert imgs.shape == (2, 3, 196, 196)
    assert labels.shape == (2, 4)
    torch.testing.assert_allclose(labels, torch.tensor(valid_labels))

    data = next(iter(dm.test_dataloader()))
    imgs, labels = data["input"], data["target"]
    assert imgs.shape == (2, 3, 196, 196)
    assert labels.shape == (2, 4)
    torch.testing.assert_allclose(labels, torch.tensor(test_labels))


@pytest.mark.skipif(not _IMAGE_TESTING, reason="image libraries aren't installed.")
@pytest.mark.parametrize(
    "data,from_function",
    [
        (torch.rand(3, 3, 196, 196), ImageClassificationData.from_tensors),
        (np.random.rand(3, 3, 196, 196), ImageClassificationData.from_numpy),
    ],
)
def test_from_data(data, from_function):
    img_data = from_function(
        train_data=data,
        train_targets=[0, 3, 6],
        val_data=data,
        val_targets=[1, 4, 7],
        test_data=data,
        test_targets=[2, 5, 8],
        batch_size=2,
        num_workers=0,
    )
def train(args, generator, discriminator):
    step = int(math.log2(args.max_size)) - 2 #-> 1
    resolution = 4 * 2 ** step
    batch_size = args.batch.get(resolution, args.batch_default)
    dataset = MultiResolutionDataset(args.path, transform, resolution=resolution)
    
    loader = sample_data(
        dataset, batch_size, resolution
    )
    data_loader = iter(loader)

    adjust_lr(g_optimizer, args.lr.get(resolution, 0.001))
    adjust_lr(d_optimizer, args.lr.get(resolution, 0.001))

    pbar = tqdm(range(3000000))

    requires_grad(generator, False)
    requires_grad(discriminator, True)

    disc_loss_val = 0
    gen_loss_val = 0
    grad_loss_val = 0

    alpha = 0
    used_sample = 0 #-> how many images has been used

    max_step = int(math.log2(args.max_size)) - 2 #-> log2(1024) - 2 = 8
    final_progress = False

    for i in pbar:
        discriminator.zero_grad()

        alpha = min(1, 1 / args.phase * (used_sample + 1)) #-> min(1, (cur+1)/60_0000)
        #-> when more than 60_0000 sampels is used, alpha will be in const to 1.0
        #-> which means we the "skip_rgb" will not be applied

        if (resolution == args.init_size and args.ckpt is None) or final_progress:
            alpha = 1
        #-> also, if initially, no previous outputs for skip-connection

        if used_sample > args.phase * 2: #-> if > 1_200_000
            ## num_of_epoch_each_phase = args.phase * 2 / training_dataset_size
            used_sample = 0
            
            step += 1

            if step > max_step:
                step = max_step
                final_progress = True
                ckpt_step = step + 1

            else:
                alpha = 0
                ckpt_step = step
            

            resolution = 4 * 2 ** step_D

            loader = sample_data(
                dataset, args.batch.get(resolution, args.batch_default), resolution
            )
            data_loader = iter(loader)

            torch.save(
                {
                    'generator': generator.module.state_dict(),
                    'discriminator': discriminator.module.state_dict(),
                    'g_optimizer': g_optimizer.state_dict(),
                    'd_optimizer': d_optimizer.state_dict(),
                    'g_running': g_running.state_dict(),
                }, r'checkpoint/train_step-{}.model'.format(ckpt_step))

            adjust_lr(g_optimizer, args.lr.get(resolution, 0.001))
            adjust_lr(d_optimizer, args.lr.get(resolution, 0.001))

        #### update discriminator
        try:
            real_image = next(data_loader)

        except (OSError, StopIteration):
            data_loader = iter(loader)
            real_image = next(data_loader)

        used_sample += real_image.shape[0]

        b_size = real_image.size(0)
        # get sample coords
        coord_handler.batch_size = b_size
        patch_handler.batch_size = b_size
        d_macro_coord_real, g_micro_coord_real, _ = coord_handler._euclidean_sample_coord()
        d_macro_coord_fake1, g_micro_coord_fake1, _ = coord_handler._euclidean_sample_coord()
        d_macro_coord_fake2, g_micro_coord_fake2, _ = coord_handler._euclidean_sample_coord()
        
        d_macro_coord_real = torch.from_numpy(d_macro_coord_real).float().cuda()
        d_macro_coord_fake1, g_micro_coord_fake1 = torch.from_numpy(d_macro_coord_fake1).float().cuda(), torch.from_numpy(g_micro_coord_fake1).float().cuda()
        d_macro_coord_fake2, g_micro_coord_fake2 = torch.from_numpy(d_macro_coord_fake2).float().cuda(), torch.from_numpy(g_micro_coord_fake2).float().cuda()

        select = np.hstack([[i*b_size+j for i in range(num_micro_in_macro)] for j in range(b_size)])
        real_image = real_image.cuda()
        
        
        real_macro = micros_to_macro(patch_handler.crop_micro_from_full_gpu(real_image, g_micro_coord_real[:, 1:2], g_micro_coord_real[:, 0:1]), config["data_params"]["ratio_macro_to_micro"])
        
        if args.loss == 'wgan-gp':
            real_predict = discriminator(real_macro, d_macro_coord_real, step=step_D, alpha=alpha)
            real_predict = real_predict.mean() - 0.001 * (real_predict ** 2).mean()
            (-real_predict).backward()

        elif args.loss == 'r1':
            real_macro.requires_grad = True
            real_scores = discriminator(real_macro, d_macro_coord_real, step=step_D, alpha=alpha)
            real_predict = F.softplus(-real_scores).mean()
            real_predict.backward(retain_graph=True)

            grad_real = grad(
                outputs=real_scores.sum(), inputs=real_macro, create_graph=True
            )[0]
            grad_penalty = (
                grad_real.view(grad_real.size(0), -1).norm(2, dim=1) ** 2
            ).mean()
            grad_penalty = 10 / 2 * grad_penalty
            grad_penalty.backward()
            if i%10 == 0:
                grad_loss_val = grad_penalty.item()

        if args.mixing and random.random() < 0.9:
            gen_in11, gen_in12, gen_in21, gen_in22 = torch.randn(
                4, b_size, code_size-2, device='cuda'
            ).chunk(4, 0)
            
            gen_in11 = gen_in11.squeeze(0)
            gen_in11 = torch.cat([gen_in11.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake1], dim=1)
            
            gen_in12 = gen_in12.squeeze(0)
            gen_in12 = torch.cat([gen_in12.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake1], dim=1)
            
            gen_in21 = gen_in21.squeeze(0)
            gen_in21 = torch.cat([gen_in21.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake2], dim=1)
            
            gen_in22 = gen_in22.squeeze(0)
            gen_in22 = torch.cat([gen_in22.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake2], dim=1)
            
            gen_in1 = [gen_in11, gen_in12]
            gen_in2 = [gen_in21, gen_in22]

        else:
            gen_in1, gen_in2 = torch.randn(2, b_size, code_size-2, device='cuda').chunk(
                2, 0                                  # 512
            )
            gen_in1 = gen_in1.squeeze(0)# (B, 254)
            gen_in2 = gen_in2.squeeze(0)# (B, 254)

            # repeat and copy
            gen_in1 = torch.cat([gen_in1.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake1], dim=1)
            gen_in2 = torch.cat([gen_in2.repeat(num_micro_in_macro, 1)[select], g_micro_coord_fake2], dim=1)


        fake_image = generator(gen_in1, step=step_G, alpha=alpha)
        fake_image = micros_to_macro(fake_image, config["data_params"]["ratio_macro_to_micro"])
        fake_predict = discriminator(fake_image, d_macro_coord_fake1, step=step_D, alpha=alpha)

        if args.loss == 'wgan-gp':
            fake_predict = fake_predict.mean()
            fake_predict.backward()

            eps = torch.rand(b_size, 1, 1, 1).cuda()
            x_hat = eps * real_image.data + (1 - eps) * fake_image.data
            x_hat.requires_grad = True
            hat_predict = discriminator(x_hat, step=step_D, alpha=alpha)
            grad_x_hat = grad(
                outputs=hat_predict.sum(), inputs=x_hat, create_graph=True
            )[0]
            grad_penalty = (
                (grad_x_hat.view(grad_x_hat.size(0), -1).norm(2, dim=1) - 1) ** 2
            ).mean()
            grad_penalty = 10 * grad_penalty
            grad_penalty.backward()
            if i%10 == 0:
                grad_loss_val = grad_penalty.item()
                disc_loss_val = (real_predict - fake_predict).item()

        elif args.loss == 'r1':
            fake_predict = F.softplus(fake_predict).mean()
            fake_predict.backward()
            if i%10 == 0:
                disc_loss_val = (real_predict + fake_predict).item()

        d_optimizer.step()


        #### update generator
        if (i + 1) % n_critic == 0:
            generator.zero_grad()

            requires_grad(generator, True)
            requires_grad(discriminator, False)

            fake_image = generator(gen_in2, step=step_G, alpha=alpha)
            fake_image = micros_to_macro(fake_image, config["data_params"]["ratio_macro_to_micro"])
            predict = discriminator(fake_image, d_macro_coord_fake2, step=step_D, alpha=alpha)

            if args.loss == 'wgan-gp':
                loss = -predict.mean()

            elif args.loss == 'r1':
                loss = F.softplus(-predict).mean()

            if i%10 == 0:
                gen_loss_val = loss.item()

            loss.backward()
            g_optimizer.step()
            accumulate(g_running, generator.module)

            requires_grad(generator, False)
            requires_grad(discriminator, True)


        #### validation
        if (i + 1) % 100 == 0:
            images = []

            gen_i, gen_j = args.gen_sample.get(resolution, (10, 5))
            
            coord_handler.batch_size = gen_i * gen_j
            _, g_micro_coord_val, _ = coord_handler._euclidean_sample_coord()
            g_micro_coord_val = torch.from_numpy(g_micro_coord_val).float().cuda()
            #print(g_micro_coord_val.shape)
            
            select = np.hstack([[i*gen_j+j for i in range(num_micro_in_macro)] for j in range(gen_j)])

            with torch.no_grad():
                for ii in range(gen_i):
                    style = torch.randn(gen_j, code_size-2).cuda().repeat(num_micro_in_macro, 1)[select]
                    #print(style.size())
                    coords = g_micro_coord_val[ii*gen_j*num_micro_in_macro:(ii+1)*gen_j*num_micro_in_macro]
                    #print(coords.size())
                    style = torch.cat([style, coords], dim=1)
                    
                    image = g_running(style, step=step_G, alpha=alpha).data.cpu()
                    image = micros_to_macro(image, config['data_params']['ratio_macro_to_micro'])
                    
                    images.append(
                        image
                    )

            utils.save_image(
                torch.cat(images, 0),
                r'sample_spatialR/%06d.png'%(i+1),
                nrow=gen_i,
                normalize=True,
                range=(-1, 1),
            )

        if (i + 1) % 10000 == 0:
            torch.save(
                g_running.state_dict(), r'checkpoint/%06d.model'%(i+1)
            )

        state_msg = (
            r'Size: {}; G: {:.3f}; D: {:.3f}; Grad: {:.3f}; Alpha: {:.5f}'.format(4 * 2 ** step, gen_loss_val, disc_loss_val, grad_loss_val, alpha)
        )

        pbar.set_description(state_msg)
Exemple #49
0
 def test_roi_align(self):
     x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
     single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
     model = ops.RoIAlign((5, 5), 1, 2)
     self.run_model(model, [(x, single_roi)])
from util import timeit,get_logger
import random
from all_model import AllModel
from process_data import ProcessData,split_train_and_valid,split_train_and_test
from feat import Feat

VERBOSITY_LEVEL = 'INFO'
LOGGER = get_logger(VERBOSITY_LEVEL, __file__)

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

import time
s = time.time()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
matrix_a = torch.rand((100,100))
matrix_b = torch.rand((100,100))
torch.mm(torch.Tensor(matrix_a).to(device),torch.Tensor(matrix_b).to(device)).cpu().numpy()
LOGGER.info(f'init torch.mm:{time.time()-s}s')
SEED = 2020

#split_mode = 'stratified','stratified_cv','shuffle_split'
split_mode='stratified_cv'

offline = True
if offline:
    try:
        import prettytable as pt
    except:
        os.system('pip install prettytable')
        import prettytable as pt
import time
import torch
import torch.nn as nn

# 2、线性回归范例:

# 准备数据:
n = 1000000
x = 10 * torch.rand([n, 2]) - 5.0
w0 = torch.tensor([[2.0, -3.0]])
b0 = torch.tensor([[10.0]])
y = x @ w0.t() + b0 + torch.normal(0.0, 2.0, size=[n, 1])  # 增加正态扰动

# 移动到GPU:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = x.cuda()
y = y.cuda()


# 定义模型
class LinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.w = nn.Parameter(torch.randn_like(w0))
        self.b = nn.Parameter(torch.zeros_like(b0))

    def forward(self, x):
        return x @ self.w.t() + self.b


linear = LinearRegression()
Exemple #52
0
def main():
    # set the path to pre-trained model and output
    args.outf = args.outf + args.net_type + '_' + args.dataset + '/'
    if os.path.isdir(args.outf) == False:
        os.mkdir(args.outf)
    torch.cuda.manual_seed(0)
    torch.cuda.set_device(args.gpu)

    out_dist_list = [
        'skin_cli', 'skin_derm', 'corrupted', 'corrupted_70', 'imgnet', 'nct',
        'final_test'
    ]

    # load networks
    if args.net_type == 'densenet_121':
        model = densenet_121.Net(models.densenet121(pretrained=False), 8)
        ckpt = torch.load("../checkpoints/densenet-121/checkpoint.pth")
        model.load_state_dict(ckpt['model_state_dict'])
        model.eval()
        model.cuda()
    elif args.net_type == 'mobilenet':
        model = mobilenet.Net(models.mobilenet_v2(pretrained=False), 8)
        ckpt = torch.load("../checkpoints/mobilenet/checkpoint.pth")
        model.load_state_dict(ckpt['model_state_dict'])
        model.eval()
        model.cuda()
        print("Done!")
    elif args.net_type == 'resnet_50':
        model = resnet_50.Net(models.resnet50(pretrained=False), 8)
        ckpt = torch.load("../checkpoints/resnet-50/checkpoint.pth")
        model.load_state_dict(ckpt['model_state_dict'])
        model.eval()
        model.cuda()
        print("Done!")
    elif args.net_type == 'vgg_16':
        model = vgg_16.Net(models.vgg16_bn(pretrained=False), 8)
        ckpt = torch.load("../checkpoints/vgg-16/checkpoint.pth")
        model.load_state_dict(ckpt['model_state_dict'])
        model.eval()
        model.cuda()
        print("Done!")
    else:
        raise Exception(f"There is no net_type={args.net_type} available.")

    in_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    print('load model: ' + args.net_type)

    # load dataset
    print('load target data: ', args.dataset)
    train_loader, test_loader = data_loader.getTargetDataSet(
        args.dataset, args.batch_size, in_transform, args.dataroot)

    # set information about feature extaction
    model.eval()
    temp_x = torch.rand(2, 3, 224, 224).cuda()
    temp_x = Variable(temp_x)
    temp_list = model.feature_list(temp_x)[1]
    num_output = len(temp_list)
    feature_list = np.empty(num_output)
    count = 0
    for out in temp_list:
        feature_list[count] = out.size(1)
        count += 1

    print('get sample mean and covariance')
    sample_mean, precision = lib_generation.sample_estimator(
        model, args.num_classes, feature_list, train_loader)

    print('get Mahalanobis scores')
    m_list = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005]

    for magnitude in m_list:
        print('Noise: ' + str(magnitude))
        for i in range(num_output):
            M_in = lib_generation.get_Mahalanobis_score(model, test_loader, args.num_classes, args.outf, \
                                                        True, args.net_type, sample_mean, precision, i, magnitude)
            M_in = np.asarray(M_in, dtype=np.float32)
            if i == 0:
                Mahalanobis_in = M_in.reshape((M_in.shape[0], -1))
            else:
                Mahalanobis_in = np.concatenate(
                    (Mahalanobis_in, M_in.reshape((M_in.shape[0], -1))),
                    axis=1)

        for out_dist in out_dist_list:
            out_test_loader = data_loader.getNonTargetDataSet(
                out_dist, args.batch_size, in_transform, args.dataroot)
            print('Out-distribution: ' + out_dist)
            for i in range(num_output):
                M_out = lib_generation.get_Mahalanobis_score(model, out_test_loader, args.num_classes, args.outf, \
                                                             False, args.net_type, sample_mean, precision, i, magnitude)
                M_out = np.asarray(M_out, dtype=np.float32)
                if i == 0:
                    Mahalanobis_out = M_out.reshape((M_out.shape[0], -1))
                else:
                    Mahalanobis_out = np.concatenate(
                        (Mahalanobis_out, M_out.reshape((M_out.shape[0], -1))),
                        axis=1)

            Mahalanobis_in = np.asarray(Mahalanobis_in, dtype=np.float32)
            Mahalanobis_out = np.asarray(Mahalanobis_out, dtype=np.float32)
            Mahalanobis_data, Mahalanobis_labels = lib_generation.merge_and_generate_labels(
                Mahalanobis_out, Mahalanobis_in)
            file_name = os.path.join(
                args.outf, 'Mahalanobis_%s_%s_%s.npy' %
                (str(magnitude), args.dataset, out_dist))
            Mahalanobis_data = np.concatenate(
                (Mahalanobis_data, Mahalanobis_labels), axis=1)
            np.save(file_name, Mahalanobis_data)
SWITCH_WORDS = False
SPEECH_FILE = os.path.join("data", CORPUS_NAME, "Clinton_2016-07-28.txt")

#
with open(MODEL_CHECKPOINT, 'rb') as f:
    model = torch.load(f)
    if USE_CUDA:
        model.cuda()
    else:
        model.cpu()

    corpus = data.Corpus(CORPUS_NAME)
    glove_embedding = glove.GloveEmbedding(corpus.vocabulary)
    ntokens = corpus.vocabulary.num_words
    hidden = model.init_hidden(1)
    input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
    if USE_CUDA:
        input.data = input.data.cuda()

words = ''

# read speech file for initialization
if SPEECH_FILE is not None:
    speech_for_gen = torch.LongTensor(30)
    with open(SPEECH_FILE, 'r', encoding="utf8") as f:
        token = 0
        for line in f:
            if token == 30:
                break
            twords = data.normalizeString(line).split() + ['EOS']
            if len(twords) > 1:
Exemple #54
0
 def __getitem__(self, index):
     img = torch.rand(*self.shape)
     target = 0  # Dummy target value
     return F.normalize(img, normalizing_mean, normalizing_std), target
    def __init__(self, dev):
        super().__init__()
        n = 8
        # Utility arguments, created as one-element tuples
        pointwise0_fp16 = (torch.randn(n, dtype=torch.float16, device=dev),)
        pointwise1_fp16 = (torch.randn(n, dtype=torch.float16, device=dev),)
        pointwise2_fp16 = (torch.randn(n, dtype=torch.float16, device=dev),)
        mat0_fp16 = (torch.randn((n, n), dtype=torch.float16, device=dev),)
        mat1_fp16 = (torch.randn((n, n), dtype=torch.float16, device=dev),)
        mat2_fp16 = (torch.randn((n, n), dtype=torch.float16, device=dev),)

        dimsets = ((n, n, n), (n, n, n, n), (n, n, n, n, n))
        conv_args_fp32 = [(torch.randn(dimset, dtype=torch.float32, device=dev),
                           torch.randn(dimset, dtype=torch.float32, device=dev))
                          for dimset in dimsets]
        bias_fp32 = (torch.randn((n,), dtype=torch.float32, device=dev),)
        element0_fp32 = (torch.randn(1, dtype=torch.float32, device=dev),)
        pointwise0_fp32 = (torch.randn(n, dtype=torch.float32, device=dev),)
        pointwise1_fp32 = (torch.randn(n, dtype=torch.float32, device=dev),)
        mat0_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),)
        mat1_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),)
        mat2_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),)
        mat3_fp32 = (torch.randn((n, n), dtype=torch.float32, device=dev),)

        # The lists below organize ops that autocast needs to test.
        # self.list_name corresponds to test_autocast_list_name in test/test_cuda.py.
        # Each op is associated with a tuple of valid arguments.
        # In addition, cudnn conv ops are not supported on ROCm and hence will
        # be skipped by passing TEST_WITH_ROCM flag to those ops in self.torch_fp16 list.

        # Some ops implement built-in type promotion.  These don't need autocasting,
        # but autocasting relies on their promotion, so we include tests to double-check.
        self.torch_expect_builtin_promote = [
            ("eq", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("ge", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("gt", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("le", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("lt", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("ne", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("add", pointwise0_fp32 + pointwise1_fp16, torch.float32),
            ("div", pointwise0_fp32 + pointwise1_fp16, torch.float32),
            ("mul", pointwise0_fp32 + pointwise1_fp16, torch.float32),
            ("cat", (pointwise0_fp16 + pointwise1_fp32,), torch.float32),
            ("equal", pointwise0_fp32 + pointwise1_fp16, torch.float32),
            ("stack", (pointwise0_fp16 + pointwise1_fp32,), torch.float32),
        ]
        self.methods_expect_builtin_promote = [
            ("__eq__", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("__ge__", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("__gt__", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("__le__", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("__lt__", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("__ne__", pointwise0_fp32 + pointwise1_fp16, torch.bool),
            ("__add__", pointwise0_fp32 + pointwise1_fp16, torch.float32),
            ("__div__", pointwise0_fp32 + pointwise1_fp16, torch.float32),
            ("__mul__", pointwise0_fp32 + pointwise1_fp16, torch.float32),
        ]

        # The remaining lists organize ops that autocast treats explicitly.
        self.torch_fp16 = [
            # deprecated _convolution
            ("_convolution", conv_args_fp32[1] + bias_fp32 + ((1, 1), (0, 0), (1, 1), False,
                                                              (0, 0), 1, False, True, True)),
            # the current  _convolution
            ("_convolution", conv_args_fp32[1] + bias_fp32 + ((1, 1), (0, 0), (1, 1), False,
                                                              (0, 0), 1, False, True, True, True)),
            ("conv1d", conv_args_fp32[0]),
            ("conv2d", conv_args_fp32[1]),
            ("conv3d", conv_args_fp32[2]),
            ("conv_tbc", conv_args_fp32[0] + bias_fp32),
            ("conv_transpose1d", conv_args_fp32[0]),
            ("conv_transpose2d", conv_args_fp32[1]),
            ("conv_transpose3d", conv_args_fp32[2]),
            ("convolution", conv_args_fp32[1] + bias_fp32 + ((1, 1), (0, 0), (1, 1), False, (0, 0), 1)),
            ("cudnn_convolution", conv_args_fp32[1] + ((0, 0), (1, 1), (1, 1), 1, False, True, True), TEST_WITH_ROCM),
            ("cudnn_convolution_transpose", conv_args_fp32[1] + ((0, 0), (0, 0), (1, 1),
                                                                 (1, 1), 1, False, True, True), TEST_WITH_ROCM),
            ("prelu", pointwise0_fp32 + element0_fp32),
            ("addmm", mat1_fp32 + mat2_fp32 + mat3_fp32),
            ("addmv", pointwise0_fp32 + mat2_fp32 + pointwise1_fp32),
            ("addr", mat0_fp32 + pointwise0_fp32 + pointwise1_fp32),
            ("matmul", mat0_fp32 + mat1_fp32),
            ("einsum", "bkhd,bqhd->bqkh", mat0_fp32 + mat1_fp32),
            ("mm", mat0_fp32 + mat1_fp32),
            ("mv", mat0_fp32 + pointwise0_fp32),
            ("chain_matmul", mat0_fp32 + mat1_fp32 + mat2_fp32),
            ("addbmm", mat0_fp32 + (torch.randn((n, n, n), device=dev, dtype=torch.float32),
                                    torch.randn((n, n, n), device=dev, dtype=torch.float32))),
            ("baddbmm", (torch.randn((n, n, n), device=dev, dtype=torch.float32),
                         torch.randn((n, n, n), device=dev, dtype=torch.float32),
                         torch.randn((n, n, n), device=dev, dtype=torch.float32))),
            ("bmm", (torch.randn((n, n, n), device=dev, dtype=torch.float32),
                     torch.randn((n, n, n), device=dev, dtype=torch.float32))),
            # _thnn_fused_lstm_cell and _thnn_fused_gru_cell are not Python-exposed as far as I can tell.
            # ("_thnn_fused_lstm_cell", mat0_fp32 + mat1_fp32 + mat2_fp32 + pointwise0_fp32 + pointwise1_fp32),
            # ("_thnn_fused_gru_cell", mat0_fp32 + mat1_fp32 + mat2_fp32 + pointwise0_fp32 + pointwise1_fp32),
            ("lstm_cell", self._rnn_cell_args(n, num_chunks=4, is_lstm=True, dev=dev, dtype=torch.float32)),
            ("gru_cell", self._rnn_cell_args(n, num_chunks=3, is_lstm=False, dev=dev, dtype=torch.float32)),
            ("rnn_tanh_cell", self._rnn_cell_args(n, num_chunks=1, is_lstm=False, dev=dev, dtype=torch.float32)),
            ("rnn_relu_cell", self._rnn_cell_args(n, num_chunks=1, is_lstm=False, dev=dev, dtype=torch.float32)),
        ]
        self.torch_fp32 = [
            ("acos", (pointwise0_fp16[0].clamp(-.9, 0.9),)),
            ("asin", (pointwise0_fp16[0].clamp(-.9, 0.9),)),
            ("cosh", pointwise0_fp16),
            ("erfinv", (pointwise0_fp16[0].clamp(-.9, .9),)),
            ("exp", pointwise0_fp16),
            ("expm1", pointwise0_fp16),
            ("log", (pointwise0_fp16[0].clamp(0.1, 100.0),)),
            ("log10", (pointwise0_fp16[0].clamp(0.1, 100.0),)),
            ("log2", (pointwise0_fp16[0].clamp(0.1, 100.0),)),
            ("log1p", (pointwise0_fp16[0].clamp(-0.9, 100.0),)),
            ("reciprocal", pointwise0_fp16),
            ("rsqrt", (pointwise0_fp16[0].clamp(0.0, 100.0),)),
            ("sinh", pointwise0_fp16),
            ("tan", (pointwise0_fp16[0].clamp(-3.1 / 2, 3.1 / 2),)),
            ("pow", ((pointwise0_fp16[0] + 1.).clamp(0.0, 100.0),) + pointwise1_fp16),
            ("pow", ((pointwise0_fp16[0] + 1.).clamp(0.0, 100.0),) + (1.7,)),
            # ("pow", (1.7,) + pointwise0_fp16), # This variant has a backend, but is not documented in the API.
            ("softmax", pointwise0_fp16 + (0,)),
            ("log_softmax", pointwise0_fp16 + (0,)),
            ("layer_norm", pointwise0_fp16 + ((pointwise0_fp16[0].numel(),),)),
            ("group_norm", mat0_fp16 + (1,)),
            ("norm", pointwise0_fp16),
            ("norm", pointwise0_fp16, {"dim": 0}),
            # these need magma
            # ("norm", mat0_fp16, {"p": "nuc"}),
            # ("norm", mat0_fp16, {"p": "nuc", "dim": 0}),
            ("norm", pointwise0_fp16, {"p": 1}),
            ("norm", pointwise0_fp16, {"p": 1, "dim": 0}),
            ("cosine_similarity", mat0_fp16 + mat1_fp16),
            ("poisson_nll_loss", mat0_fp16 + mat1_fp16 + (True, False, 1.e-8, torch.nn._reduction.get_enum('mean'))),
            ("cosine_embedding_loss", (torch.tensor([[1, 2, 3]], device=dev, dtype=torch.float16),
                                       torch.tensor([[1, 3, 4]], device=dev, dtype=torch.float16),
                                       torch.tensor([1], device=dev, dtype=torch.int))),
            ("hinge_embedding_loss", mat0_fp16 + (torch.ones(n, device=dev, dtype=torch.int),)),
            ("kl_div", mat0_fp16 + (torch.rand((n, n), device=dev, dtype=torch.float16),)),
            ("margin_ranking_loss", mat0_fp16 + mat1_fp16 + (torch.ones((n,), device=dev, dtype=torch.float16),)),
            ("triplet_margin_loss", mat0_fp16 + mat1_fp16 + mat2_fp16),
            ("binary_cross_entropy_with_logits", mat0_fp16 + (torch.rand((n, n), device=dev, dtype=torch.float16),)),
            ("cumprod", pointwise0_fp16 + (0,)),
            ("cumsum", pointwise0_fp16 + (0,)),
            ("dist", pointwise0_fp16 + pointwise1_fp16),
            ("pdist", mat0_fp16),
            ("cdist", mat0_fp16 + mat1_fp16),
            ("prod", pointwise0_fp16),
            ("prod", pointwise0_fp16 + (0,)),
            ("renorm", mat0_fp16 + (2, 0, 1.0)),
            ("sum", pointwise0_fp16),
            ("sum", mat0_fp16 + (1,)),
            ("logsumexp", mat0_fp16 + (1,)),
        ]
        self.torch_need_autocast_promote = [
            ("addcdiv", pointwise0_fp32 + pointwise1_fp16 + (pointwise2_fp16[0].clamp(0.1, 100),)),
            ("addcmul", pointwise0_fp32 + pointwise1_fp16 + pointwise2_fp16),
            ("atan2", pointwise0_fp32 + (pointwise1_fp16[0].clamp(0.1, 100),)),
            ("bilinear", (torch.randn((1, 2), dtype=torch.float16, device=dev),
                          torch.randn((1, 2), dtype=torch.float32, device=dev),
                          torch.randn((1, 2, 2), dtype=torch.float16, device=dev),
                          torch.randn((1,), dtype=torch.float32, device=dev))),
            ("cross", (torch.randn(3, dtype=torch.float32, device=dev),
                       torch.randn(3, dtype=torch.float16, device=dev))),
            ("dot", pointwise0_fp16 + pointwise1_fp32),
            ("grid_sampler", (torch.randn((2, 3, 33, 22), dtype=torch.float16, device=dev),
                              torch.randn((2, 22, 11, 2), dtype=torch.float32, device=dev),
                              0, 0, False)),
            ("index_put", pointwise0_fp32 + ((torch.tensor([1], device=dev, dtype=torch.long),),
                                             torch.randn(1, device=dev, dtype=torch.float16))),
            ("index_put", pointwise0_fp16 + ((torch.tensor([1], device=dev, dtype=torch.long),),
                                             torch.randn(1, device=dev, dtype=torch.float32))),
            ("tensordot", (torch.randn((2, 2, 2), dtype=torch.float32, device=dev),
                           torch.randn((2, 2, 2), dtype=torch.float16, device=dev))),
            ("scatter_add", (torch.zeros(2, 2, 2, dtype=torch.float32, device=dev),
                             0,
                             torch.randint(0, 2, (2, 2, 2), device=dev),
                             torch.randn((2, 2, 2), dtype=torch.float16, device=dev))),
            ("scatter_add", (torch.zeros(2, 2, 2, dtype=torch.float16, device=dev),
                             0,
                             torch.randint(0, 2, (2, 2, 2), device=dev),
                             torch.randn((2, 2, 2), dtype=torch.float32, device=dev))),
        ]
        self.nn_fp16 = [
            ("linear", mat0_fp32 + mat1_fp32 + mat2_fp32),
        ]
        self.nn_fp32 = [
            ("softplus", pointwise0_fp16),
            ("nll_loss", (torch.rand((n, n), device=dev, dtype=torch.float),
                          torch.zeros((n,), device=dev, dtype=torch.long))),
            ("nll_loss2d", (torch.rand((n, n, n, n), device=dev, dtype=torch.half),
                            torch.zeros((n, n, n), device=dev, dtype=torch.long))),
            ("l1_loss", mat0_fp16 + mat1_fp16),
            ("smooth_l1_loss", mat0_fp16 + mat1_fp16),
            ("mse_loss", mat0_fp16 + mat1_fp16),
            ("multilabel_margin_loss", mat0_fp16 + (torch.ones((n, n), device=dev, dtype=torch.long),)),
            ("soft_margin_loss", mat0_fp16 + (torch.ones((n, n), device=dev, dtype=torch.long),)),
            ("multi_margin_loss", mat0_fp16 + (torch.ones((n,), device=dev, dtype=torch.long),)),
        ]
        self.linalg_fp16 = [
            ("linalg_multi_dot", (mat0_fp32 + mat1_fp32 + mat2_fp32,)),
        ]
        self.methods_fp16 = [
            ("__matmul__", mat0_fp32 + mat1_fp32)
        ]
        self.methods_fp32 = [
            ("__pow__", (torch.rand(n, device=dev, dtype=torch.float16), 1.5)),
        ]
        self.banned = [
            ("binary_cross_entropy", (torch.rand((n, n), device=dev, dtype=torch.float32),
                                      torch.rand((n, n), device=dev, dtype=torch.float32)), torch._C._nn),
        ]
Exemple #56
0
        self.fc = nn.Linear(512, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
    def forward(self, x):
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


if __name__ =='__main__':
    model = se_resnet(300).cuda()
    img = torch.rand(4,3,80,80).cuda()   #416 320     800 608
    out =  model(img)
    att = torch.rand(300,164).cuda()
    res = torch.mm(out,att)
    print(out.size())
    print(res.size())
Exemple #57
0
if __name__ == '__main__':
    import torch

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # b = HighRes2DNet(1, 2)
    # b.to(device)
    # b.eval()
    # print(b.num_parameters)
    # i = torch.rand(1, 1, 32, 32, device=device)
    # print(b(i).shape)

    b = HighRes3DNet(
        1,
        2,
        initial_out_channels_power=4,
        layers_per_residual_block=2,
        residual_blocks_per_dilation=3,
        dilations=3,
        # residual_type='project',
    )
    b.to(device)
    # b.eval()
    print(b.num_parameters)
    print(b.get_receptive_field_world(spacing=2))
    i = torch.rand(1, 1, 97, 115, 97, device=device)  # 2 mm
    # print(b.get_receptive_field_world(spacing=3))
    # i = torch.rand(1, 1, 64, 76, 64, device=device)  # 3 mm
    # i = torch.rand(1, 1, 80, 80, 80, device=device)
    print(b(i).shape)
Exemple #58
0
def conv_lstm_test():
    input = torch.autograd.Variable(torch.rand(1, 30, 1, 128, 128))
    model = ConvLSTM(input_dim=1,
                     hidden_dim=[32, 64, 128],
                     kernel_size=(3, 3),
                     num_layers=3,
                     batch_first=True,
                     bias=True,
                     return_all_layers=False)

    print(model)

    layer_output, last_state = model(input)

    layer_output = layer_output[0]
    h, c = last_state[0][0], last_state[0][1]

    print(layer_output[0].shape)
    print(len(last_state[0]))
    print(last_state[0][0].shape)
    print(last_state[0][1].shape)


if __name__ == "__main__":
    input = torch.autograd.Variable(torch.rand(1, 30, 1, 40, 128)).to("cuda")

    model = EEGNet().to("cuda")

    output = model(input)

    print(output.shape)
Exemple #59
0
        h = torch.stack(h).permute(1, 0, 2)
        h_reshape = h.contiguous().view(batch_size * time_step,
                                        self.hidden_dim)
        if self.dropout > 0.0:
            h_reshape = self.nn_dropout(h_reshape)
        output = self.nn_output(h_reshape)
        output = self.sigmoid(output)
        output = output.contiguous().view(batch_size, time_step,
                                          self.output_dim)
        return output, inputse_att


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    args = train.parse_arguments(parser)
    print('Constructing model ... ')
    device = torch.device("cuda:0" if torch.cuda.is_available() ==
                          True else 'cpu')
    print("available device: {}".format(device))
    batch_x = torch.rand(128, 400, 76)
    batch_x = torch.tensor(batch_x, dtype=torch.float32).to(device)

    model = AdaCare(args.rnn_dim, args.kernel_size, args.kernel_num,
                    args.input_dim, args.output_dim, args.dropout_rate,
                    args.r_visit, args.r_conv, args.activation_func,
                    device).to(device)
    cur_output, _ = model(batch_x, device)
    flops, params = profile(model, inputs=(batch_x, device))
    print('flops: ', flops, ' params: ', params)
    print('!!!!!!!')
    """
    def __init__(self, inp=10, out=16, kernel_size=3):
        super(TestConv2d, self).__init__()
        self.conv2d = nn.Conv2d(inp,
                                out,
                                stride=1,
                                kernel_size=kernel_size,
                                bias=True)

    def forward(self, x):
        x = self.conv2d(x)
        return x


#model = TestConv2d()
input = torch.rand(1, 3, 473, 473).cuda()
model = DeepLabV3(layers=50,
                  dropout=0.1,
                  classes=21,
                  zoom_factor=8,
                  pretrained=True).cuda()
model.eval()
print(model)
output = model(input)

#model = DeepLabV3(layers=101, classes=21, zoom_factor=8, pretrained=False)

#input_np = np.random.uniform(0, 1, (1, 3, 313, 313))
#input_var = Variable(torch.FloatTensor(input_np))

#k_model = pytorch_to_keras(model, input_var, [(3, 313, 313,)], verbose=True)