def test_resnet_resnet101(self):
        with _test_eager_guard():
            model = resnet101(pretrained=False)
            egr_data = paddle.to_tensor(self.data)
            egr_data.stop_gradient = False
            egr_out = model(egr_data)
            egr_preds = paddle.argmax(egr_out, axis=1)
            egr_label_onehot = paddle.nn.functional.one_hot(
                paddle.to_tensor(egr_preds), num_classes=egr_out.shape[1])
            egr_target = paddle.sum(egr_out * egr_label_onehot, axis=1)

            egr_g = paddle.grad(outputs=egr_target, inputs=egr_out)[0]
            egr_g_numpy = egr_g.numpy()
            self.assertEqual(list(egr_g_numpy.shape), list(egr_out.shape))

        model = resnet101(pretrained=False)
        data = paddle.to_tensor(self.data)
        data.stop_gradient = False
        out = model(data)
        preds = paddle.argmax(out, axis=1)
        label_onehot = paddle.nn.functional.one_hot(paddle.to_tensor(preds),
                                                    num_classes=out.shape[1])
        target = paddle.sum(out * label_onehot, axis=1)

        g = paddle.grad(outputs=target, inputs=out)[0]
        g_numpy = g.numpy()
        self.assertEqual(list(g_numpy.shape), list(out.shape))

        self.assertTrue(np.array_equal(egr_out, out))
        self.assertTrue(np.array_equal(egr_g_numpy, g_numpy))
Ejemplo n.º 2
0
 def jacobian(self, coordinates):
     new_coordinates = self.warp_coordinates(coordinates)
     # PDPD cannot use new_coordinates[..., 0]
     assert len(new_coordinates.shape) == 3
     grad_x = paddle.grad(new_coordinates[:, :, 0].sum(),
                          coordinates,
                          create_graph=True)
     grad_y = paddle.grad(new_coordinates[:, :, 1].sum(),
                          coordinates,
                          create_graph=True)
     jacobian = paddle.concat(
         [grad_x[0].unsqueeze(-2), grad_y[0].unsqueeze(-2)], axis=-2)
     return jacobian
Ejemplo n.º 3
0
    def finetunning(self, x_spt, y_spt, x_qry, y_qry):
        # assert len(x_spt.shape) == 4

        query_size = x_qry.shape[0]
        correct_list = [0 for _ in range(self.update_step_test + 1)]

        new_net = deepcopy(self.net)
        y_hat = new_net(x_spt)
        loss = F.cross_entropy(y_hat, y_spt)
        grad = paddle.grad(loss, new_net.parameters())
        fast_weights = list(
            map(lambda p: p[1] - self.base_lr * p[0],
                zip(grad, new_net.parameters())))

        # 在query集上测试,计算准确率
        # 这一步使用更新前的数据
        with paddle.no_grad():
            y_hat = new_net(x_qry,
                            params=new_net.parameters(),
                            bn_training=True)
            pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1)  # size = (75)
            correct = paddle.equal(pred_qry, y_qry).numpy().sum().item()
            correct_list[0] += correct

        # 使用更新后的数据在query集上测试。
        with paddle.no_grad():
            y_hat = new_net(x_qry, params=fast_weights, bn_training=True)
            pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1)  # size = (75)
            correct = paddle.equal(pred_qry, y_qry).numpy().sum().item()
            correct_list[1] += correct

        for k in range(1, self.update_step_test):
            y_hat = new_net(x_spt, params=fast_weights, bn_training=True)
            loss = F.cross_entropy(y_hat, y_spt)
            grad = paddle.grad(loss, fast_weights)
            fast_weights = list(
                map(lambda p: p[1] - self.base_lr * p[0],
                    zip(grad, fast_weights)))

            y_hat = new_net(x_qry, fast_weights, bn_training=True)

            with paddle.no_grad():
                pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1)
                correct = paddle.equal(pred_qry, y_qry).numpy().sum().item()
                correct_list[k + 1] += correct

        del new_net
        accs = np.array(correct_list) / query_size
        return accs
Ejemplo n.º 4
0
def _kl_expfamily_expfamily(p, q):
    """Compute kl-divergence using `Bregman divergences <https://www.lix.polytechnique.fr/~nielsen/EntropyEF-ICIP2010.pdf>`_
    """
    if not type(p) == type(q):
        raise NotImplementedError

    p_natural_params = []
    for param in p._natural_parameters:
        param = param.detach()
        param.stop_gradient = False
        p_natural_params.append(param)

    q_natural_params = q._natural_parameters

    p_log_norm = p._log_normalizer(*p_natural_params)

    try:
        if _non_static_mode():
            p_grads = paddle.grad(p_log_norm,
                                  p_natural_params,
                                  create_graph=True)
        else:
            p_grads = paddle.static.gradients(p_log_norm, p_natural_params)
    except RuntimeError as e:
        raise TypeError(
            "Cann't compute kl_divergence({cls_p}, {cls_q}) use bregman divergence. Please register_kl({cls_p}, {cls_q})."
            .format(cls_p=type(p).__name__, cls_q=type(q).__name__)) from e

    kl = q._log_normalizer(*q_natural_params) - p_log_norm
    for p_param, q_param, p_grad in zip(p_natural_params, q_natural_params,
                                        p_grads):
        term = (q_param - p_param) * p_grad
        kl -= _sum_rightmost(term, len(q.event_shape))

    return kl
Ejemplo n.º 5
0
    def entropy(self):
        """caculate entropy use `bregman divergence` 
        https://www.lix.polytechnique.fr/~nielsen/EntropyEF-ICIP2010.pdf
        """
        entropy_value = -self._mean_carrier_measure

        natural_parameters = []
        for parameter in self._natural_parameters:
            parameter = parameter.detach()
            parameter.stop_gradient = False
            natural_parameters.append(parameter)

        log_norm = self._log_normalizer(*natural_parameters)

        if in_dygraph_mode():
            grads = paddle.grad(
                log_norm.sum(), natural_parameters, create_graph=True)
        else:
            grads = paddle.static.gradients(log_norm.sum(), natural_parameters)

        entropy_value += log_norm
        for p, g in zip(natural_parameters, grads):
            entropy_value -= p * g

        return entropy_value
Ejemplo n.º 6
0
    def test_hook_in_double_grad(self):
        def double_print_hook(grad):
            grad = grad * 2
            print(grad)
            return grad

        x = paddle.ones(shape=[1], dtype='float32')
        x.stop_gradient = False

        # hook only works in backward
        # for forward var x, the x.grad generated in
        # paddle.grad will not deal with by hook
        x.register_hook(double_print_hook)

        y = x * x

        # Since y = x * x, dx = 2 * x
        dx = paddle.grad(outputs=[y],
                         inputs=[x],
                         create_graph=True,
                         retain_graph=True)[0]

        z = y + dx
        self.assertTrue(x.grad is None)

        # If create_graph = True, the gradient of dx
        # would be backpropagated. Therefore,
        # z = x * x + dx = x * x + 2 * x, and
        # x.gradient() = 2 * x + 2 = 4.0
        # after changed by hook: 8.0

        z.backward()
        self.assertTrue(np.array_equal(x.grad.numpy(), np.array([8.])))
Ejemplo n.º 7
0
    def test_create_graph_false(self):
        def func(x):
            return paddle.matmul(x * x, self.weight)[:, 0:1]

        numerical_hessian = _compute_numerical_batch_hessian(
            func, self.x, self.numerical_delta, self.np_dtype)
        self.x.stop_gradient = False
        hessian = paddle.autograd.batch_hessian(func, self.x)
        assert hessian.stop_gradient == True
        assert np.allclose(hessian.numpy(), numerical_hessian, self.rtol,
                           self.atol)
        try:
            paddle.grad(hessian, self.x)
        except RuntimeError as e:
            error_msg = cpt.get_exception_message(e)
            assert error_msg.find("has no gradient") > 0
Ejemplo n.º 8
0
 def run_gelu_op(approximate):
     with dg.guard():
         x = paddle.to_tensor(x_np)
         x.stop_gradient = False
         y = F.gelu(x, approximate=approximate)
         x_grad = paddle.grad([y], [x], [paddle.to_tensor(y_g_np)])[0]
         return y.numpy(), x_grad.numpy()
Ejemplo n.º 9
0
    def test_check_grad(self):
        paddle.disable_static(place=self.place)
        shape = (4, 5)
        x_np = np.random.uniform(-1, 1, shape).astype(np.float64)
        x_np[0, :] = np.nan
        x_np[1, :3] = np.nan
        x_np[2, 3:] = np.nan
        x_np_sorted = np.sort(x_np)
        nan_counts = np.count_nonzero(np.isnan(x_np).astype(np.int32), axis=1)
        np_grad = np.zeros((shape))
        for i in range(shape[0]):
            valid_cnts = shape[1] - nan_counts[i]
            if valid_cnts == 0:
                continue

            mid = int(valid_cnts / 2)
            targets = [x_np_sorted[i, mid]]
            is_odd = valid_cnts % 2
            if not is_odd and mid > 0:
                targets.append(x_np_sorted[i, mid - 1])
            for j in range(shape[1]):
                if x_np[i, j] in targets:
                    np_grad[i, j] = 1 if is_odd else 0.5

        x_tensor = paddle.to_tensor(x_np, stop_gradient=False)
        y = paddle.nanmedian(x_tensor, axis=1, keepdim=True)
        dx = paddle.grad(y, x_tensor)[0].numpy()
        self.assertTrue(np.allclose(np_grad, dx, equal_nan=True))
Ejemplo n.º 10
0
 def adapt_gradient_descent(self,
                            model,
                            lr,
                            loss,
                            approximate=True,
                            memo=None):
     # copy the function from paddlefsl.utils.gradient_descent
     # Maps original data_ptr to the cloned tensor.
     # Useful when a model uses parameters from another model.
     memo = set() if memo is None else set(memo)
     # Do gradient descent on parameters
     gradients = []
     if len(model.layers.parameters()) != 0:
         gradients = paddle.grad(loss,
                                 model.layers.parameters(),
                                 retain_graph=not approximate,
                                 create_graph=not approximate,
                                 allow_unused=True)
     update_values = [
         -lr * grad if grad is not None else None for grad in gradients
     ]
     for param, update in zip(model.layers.parameters(), update_values):
         if update is not None:
             param_ptr = id(param)
             if param_ptr in memo:
                 param.set_value(param.add(update))
Ejemplo n.º 11
0
def get_eager_triple_grad(func,
                          x_init=None,
                          dy_init=None,
                          place=None,
                          return_mid_result=False):
    """
    Get triple Grad result of dygraph.

    Args:
        func: A wrapped dygraph function that its logic is equal to static program
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        return_mid_result (list[Tensor], list[Tensor]): If set True, the 
    Returns:
        A list of numpy array that stores second derivative result calulated by dygraph
    """
    dd_y, dd_x = get_eager_double_grad(func,
                                       x_init,
                                       dy_init,
                                       place,
                                       return_mid_result=True)

    # calcluate third derivative
    dddys = []
    for dd_yi in dd_y:
        dd_yi.stop_gradient = False
        dddy = paddle.ones(shape=dd_yi.shape, dtype=dd_yi.dtype)
        dddy.stop_gradient = False
        dddys.append(dddy)
    ddd_inputs = paddle.grad(outputs=dd_y, inputs=dd_x, grad_outputs=dddys)
    return [ddd_input.numpy() for ddd_input in ddd_inputs]
Ejemplo n.º 12
0
 def test_vjp_i1o1_no_create_graph(self):
     test_cases = [
         [reduce, 'A'],  #noqa
         [reduce_dim, 'A'],  #noqa
     ]  #noqa
     for f, inputs in test_cases:
         vjp, grad = self.gen_test_pairs(f, inputs)
         vjp_result, grad_result = vjp(), grad()
         self.check_results(grad_result, vjp_result)
Ejemplo n.º 13
0
 def test_vjp_nested_no_create_graph(self):
     x = self.gen_input('a')
     test_cases = [
         [nested(x), 'a'],  #noqa
     ]
     for f, inputs in test_cases:
         vjp, grad = self.gen_test_pairs(f, inputs)
         vjp_result, grad_result = vjp(), grad()
         self.check_results(grad_result, vjp_result)
Ejemplo n.º 14
0
 def forward(self, x):
     x.stop_gradient = False
     tmp = x + x
     for i in range(10):
         tmp = self.linear(tmp)
     out = tmp
     dx = paddle.grad(
         [out], [x], None, create_graph=True, allow_unused=False)[0]
     return dx
Ejemplo n.º 15
0
 def test_vjp_i2o2_omitting_v_no_create_graph(self):
     test_cases = [
         [o2, ['A', 'A']],  #noqa
     ]  #noqa
     for f, inputs in test_cases:
         inputs = self.gen_inputs(inputs)
         vjp, grad = self.gen_test_pairs(f, inputs)
         vjp_result, grad_result = vjp(), grad()
         self.check_results(grad_result, vjp_result)
Ejemplo n.º 16
0
 def test_vjp_i2o1_no_create_graph(self):
     test_cases = [
         [matmul, ['A', 'B']],  #noqa
         [mul, ['b', 'c']],  #noqa
     ]  #noqa
     for f, inputs in test_cases:
         vjp, grad = self.gen_test_pairs(f, inputs)
         vjp_result, grad_result = vjp(), grad()
         self.check_results(grad_result, vjp_result)
Ejemplo n.º 17
0
    def test_create_graph_false(self):
        def func(x, y):
            return x * y

        numerical_jacobian = _compute_numerical_batch_jacobian(
            func, [self.x, self.y], self.numerical_delta, self.np_dtype)
        self.x.stop_gradient = False
        self.y.stop_gradient = False
        jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
        for j in range(len(jacobian)):
            assert jacobian[j].stop_gradient == True
            assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j],
                               self.rtol, self.atol)
        try:
            paddle.grad(jacobian[0], [self.x, self.y])
        except RuntimeError as e:
            error_msg = cpt.get_exception_message(e)
            assert error_msg.find("has no gradient") > 0
Ejemplo n.º 18
0
    def test_sample_reparameterized(self):
        mean = paddle.ones([2, 3])
        logstd = paddle.ones([2, 3])
        mean.stop_gradient = False
        logstd.stop_gradient = False
        norm_rep = Normal(mean=mean, logstd=logstd)
        samples = norm_rep.sample()
        mean_grads, logstd_grads = paddle.grad(outputs=[samples], inputs=[mean, logstd],
                                               allow_unused=True)
        self.assertTrue(mean_grads is not None)
        self.assertTrue(logstd_grads is not None)

        norm_no_rep = Normal(mean=mean, logstd=logstd, is_reparameterized=False)
        samples = norm_no_rep.sample()
        mean_grads, logstd_grads = paddle.grad(outputs=[samples],
                                               inputs=[mean, logstd],
                                               allow_unused=True)

        self.assertEqual(mean_grads, None)
        self.assertEqual(logstd_grads, None)
Ejemplo n.º 19
0
 def grad_test():
     nonlocal v
     xs = self.gen_inputs(inputs)
     if v is not None:
         v = self.gen_inputs(v)
     outputs = func(*xs)
     if v is not None:
         inputs_grad = grad(
             outputs,
             xs,
             v,
             create_graph=create_graph,
             allow_unused=allow_unused)
     else:
         inputs_grad = grad(
             outputs,
             xs,
             create_graph=create_graph,
             allow_unused=allow_unused)
     return outputs, inputs_grad
Ejemplo n.º 20
0
 def predict_fn(data, labels):
     if isinstance(data, tuple):
         probs = self.paddle_model(*data)
     else:
         probs = self.paddle_model(data)
     labels_onehot = paddle.nn.functional.one_hot(
         paddle.to_tensor(labels), num_classes=probs.shape[1])
     target = paddle.sum(probs * labels_onehot, axis=1)
     gradients = paddle.grad(outputs=[target],
                             inputs=[self._embedding])[0]
     return gradients.numpy(), probs.numpy(), self._embedding.numpy(
     )
Ejemplo n.º 21
0
 def test_checkout_grad(self):
     place = core.CUDAPlace(0)
     if core.is_float16_supported(place):
         with fluid.dygraph.guard():
             x_np = np.random.random((10, 10)).astype(self.dtype)
             x = paddle.to_tensor(x_np)
             x.stop_gradient = False
             y = fluid.layers.mean(x)
             dx = paddle.grad(y, x)[0].numpy()
             dx_expected = self.dtype(1.0 / np.prod(x_np.shape)) * np.ones(
                 x_np.shape).astype(self.dtype)
             self.assertTrue(np.array_equal(dx, dx_expected))
Ejemplo n.º 22
0
    def test_create_graph_false(self):
        def func(x):
            return paddle.sum(F.sigmoid(x))

        numerical_func_output = func(self.x).numpy()
        numerical_vhp = _compute_numerical_vhp(func, self.x, self.vx,
                                               self.numerical_delta,
                                               self.np_dtype)

        self.x.stop_gradient = False
        func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx)
        assert np.allclose(func_output.numpy(), numerical_func_output,
                           self.rtol, self.atol)
        assert vhp[0].stop_gradient == True
        assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol,
                           self.atol)
        try:
            paddle.grad(vhp, self.x)
        except RuntimeError as e:
            error_msg = cpt.get_exception_message(e)
            assert error_msg.find("has no gradient") > 0
def r1_reg(d_out, x_in):
    # zero-centered gradient penalty for real images
    batch_size = x_in.shape[0]
    grad_dout = paddle.grad(outputs=d_out.sum(),
                            inputs=x_in,
                            create_graph=True,
                            retain_graph=True,
                            only_inputs=True)[0]
    grad_dout2 = grad_dout.pow(2)
    assert (grad_dout2.shape == x_in.shape)
    reg = 0.5 * paddle.reshape(grad_dout2, (batch_size, -1)).sum(1).mean(0)
    return reg
Ejemplo n.º 24
0
 def predict_fn(data, labels):
     data = paddle.to_tensor(data)
     data.stop_gradient = False
     out = self.paddle_model(data)
     out = paddle.nn.functional.softmax(out, axis=1)
     preds = paddle.argmax(out, axis=1)
     if labels is None:
         labels = preds.numpy()
     labels_onehot = paddle.nn.functional.one_hot(
         paddle.to_tensor(labels), num_classes=out.shape[1])
     target = paddle.sum(out * labels_onehot, axis=1)
     gradients = paddle.grad(outputs=[target], inputs=[data])[0]
     return gradients.numpy(), labels
Ejemplo n.º 25
0
 def generate_gradients(self, targets, inputs):
     if not isinstance(targets, list):
         if len(self._ones_like_targets) == 0:
             ones_like_targets = paddle.ones_like(targets)
             self._ones_like_targets.append(ones_like_targets)
         else:
             ones_like_targets = self._ones_like_targets[0]
     else:
         ones_like_targets = None
     gradients = paddle.grad(outputs=targets,
                             inputs=inputs,
                             grad_outputs=ones_like_targets)
     return gradients
    def check_resnet(self):
        data = np.random.rand(1, 3, 224, 224).astype(np.float32)
        data = paddle.to_tensor(data)
        data.stop_gradient = False
        out = self.model(data)
        preds = paddle.argmax(out, axis=1)
        label_onehot = paddle.nn.functional.one_hot(paddle.to_tensor(preds),
                                                    num_classes=out.shape[1])
        target = paddle.sum(out * label_onehot, axis=1)

        g = paddle.grad(outputs=target, inputs=out)[0]
        g_numpy = g.numpy()
        self.assertEqual(list(g_numpy.shape), list(out.shape))
Ejemplo n.º 27
0
    def test_create_graph_true(self):
        def func(x):
            return paddle.sum(F.sigmoid(x))

        numerical_hessian = _compute_numerical_hessian(func, self.x,
                                                       self.numerical_delta,
                                                       self.np_dtype)
        self.x.stop_gradient = False
        hessian = paddle.autograd.hessian(func, self.x, create_graph=True)
        assert hessian.stop_gradient == False
        assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol,
                           self.atol)
        triple_grad = paddle.grad(hessian, self.x)
        assert triple_grad is not None
Ejemplo n.º 28
0
    def test_create_graph_true(self):
        def func(x):
            return paddle.matmul(x * x, self.weight)[:, 0:1]

        numerical_hessian = _compute_numerical_batch_hessian(
            func, self.x, self.numerical_delta, self.np_dtype)
        self.x.stop_gradient = False
        hessian = paddle.autograd.batch_hessian(func,
                                                self.x,
                                                create_graph=True)
        assert hessian.stop_gradient == False
        assert np.allclose(hessian.numpy(), numerical_hessian, self.rtol,
                           self.atol)
        triple_grad = paddle.grad(hessian, self.x)
        assert triple_grad is not None
Ejemplo n.º 29
0
def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
    paddle.set_device(device)

    t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)

    out = func(t) if use_func else paddle.nn.functional.relu(t)
    out.stop_gradient = False

    dx = paddle.grad(
        outputs=[out], inputs=[t], create_graph=True, retain_graph=True)

    dx[0].backward()

    assert dx[0].grad is not None
    return dx[0].numpy(), dx[0].grad.numpy()
 def grad(self,
          outputs,
          inputs,
          grad_outputs=None,
          no_grad_vars=None,
          retain_graph=None,
          create_graph=False,
          allow_unused=False):
     return paddle.grad(outputs=outputs,
                        inputs=inputs,
                        grad_outputs=grad_outputs,
                        no_grad_vars=no_grad_vars,
                        retain_graph=retain_graph,
                        create_graph=create_graph,
                        allow_unused=allow_unused)