Esempio n. 1
0
def test_assign_static1():
    """
    default,input=tensor,type=int,bool
    """
    paddle.enable_static()
    for t in types1:
        np.random.seed(seed)
        main_program = fluid.Program()
        startup_program = fluid.Program()
        input = np.arange(6).reshape([6]).astype(t)
        feed = {"input": input}
        with fluid.unique_name.guard():
            with fluid.program_guard(main_program=main_program,
                                     startup_program=startup_program):
                input1 = paddle.static.data(name="input",
                                            shape=input.shape,
                                            dtype=t)
                input1.stop_gradient = False
                output = paddle.fluid.layers.assign(input1)
                loss = paddle.mean(output)
                g = fluid.gradients(loss, input1)
                exe = fluid.Executor()
                exe.run(startup_program)
                out, g = exe.run(main_program,
                                 feed=feed,
                                 fetch_list=[output, g])
                compare(out, input)
Esempio n. 2
0
    def gradient_penalty(self, f, real, fake, cfg=None, name=None):
        def _interpolate(a, b):
            shape = [a.shape[0]]
            alpha = fluid.layers.uniform_random_batch_size_like(
                input=a, shape=shape, min=0.0, max=1.0)
            a.stop_gradient = True
            b.stop_gradient = True
            inner1 = fluid.layers.elementwise_mul(a, alpha, axis=0)
            inner2 = fluid.layers.elementwise_mul(b, (1.0 - alpha), axis=0)
            inner1.stop_gradient = True
            inner2.stop_gradient = True
            inner = inner1 + inner2
            return inner

        x = _interpolate(real, fake)
        pred, _ = f(x, cfg, name=name)
        if isinstance(pred, tuple):
            pred = pred[0]
        vars = []
        for var in fluid.default_main_program().list_vars():
            if fluid.io.is_parameter(var) and var.name.startswith('d_'):
                vars.append(var.name)
        grad = fluid.gradients(pred, x, no_grad_set=vars)[0]
        grad_shape = grad.shape
        grad = fluid.layers.reshape(
            grad, [-1, grad_shape[1] * grad_shape[2] * grad_shape[3]])
        epsilon = 1e-16
        norm = fluid.layers.sqrt(
            fluid.layers.reduce_sum(
                fluid.layers.square(grad), dim=1) + epsilon)
        gp = fluid.layers.reduce_mean(fluid.layers.square(norm - 1.0))
        return gp
def test_softmax_with_cross_entropy_static1():
    """
    default
    """
    paddle.enable_static()
    for place in places:
        for t in types:
            x = np.arange(8).reshape(8).astype(t)
            label = np.array([[1]], dtype='int64')
            feed = {'x': x, 'label': label}
            main_program = fluid.Program()
            startup_program = fluid.Program()
            with fluid.program_guard(main_program=main_program,
                                     startup_program=startup_program):
                logits1 = paddle.static.data(name="x", shape=x.shape, dtype=t)
                label1 = paddle.static.data(name="label",
                                            shape=label.shape,
                                            dtype='int64')
                logits1.stop_gradient = False
                output = fluid.layers.softmax_with_cross_entropy(
                    logits=logits1, label=label1)
                loss = paddle.mean(output)
                g = fluid.gradients(loss, logits1)
                exe = fluid.Executor(place)
                exe.run(startup_program)
                out, g = exe.run(main_program,
                                 feed=feed,
                                 fetch_list=[output, g])
                assert np.allclose(out, [[6.45833969]],
                                   atol=0.005,
                                   rtol=0.05,
                                   equal_nan=True)
Esempio n. 4
0
    def _paddle_prepare(self, predict_fn=None):
        if predict_fn is None:
            startup_prog = fluid.Program()
            main_program = fluid.Program()
            with fluid.program_guard(main_program, startup_prog):
                with fluid.unique_name.guard():
                    data_op = fluid.data(name='data',
                                         shape=[None],
                                         dtype='int64',
                                         lod_level=1)
                    label_op = fluid.data(name='label',
                                          shape=[None, 1],
                                          dtype='int64')
                    alpha_op = fluid.layers.data(name='alpha',
                                                 shape=[None, 1],
                                                 dtype='double')

                    emb, probs = self.paddle_model(data_op, alpha_op,
                                                   self.noise_amount)

                    for op in main_program.global_block().ops:
                        if op.type == 'batch_norm':
                            op._set_attr('use_global_stats', True)
                        elif op.type == 'dropout':
                            op._set_attr('dropout_prob', 0.0)

                    class_num = probs.shape[-1]
                    one_hot = fluid.layers.one_hot(label_op, class_num)
                    one_hot = fluid.layers.elementwise_mul(probs, one_hot)
                    target_category_loss = fluid.layers.reduce_sum(one_hot,
                                                                   dim=1)

                    p_g_list = fluid.backward.append_backward(
                        target_category_loss)
                    gradients_map = fluid.gradients(one_hot, emb)[0]

            if self.use_cuda:
                gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
                place = fluid.CUDAPlace(gpu_id)
            else:
                place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            fluid.io.load_persistables(exe, self.trained_model_path,
                                       main_program)

            def predict_fn(data, label, alpha):
                gradients, out, embedding = exe.run(
                    main_program,
                    feed={
                        'data': data,
                        'label': label,
                        'alpha': alpha
                    },
                    fetch_list=[gradients_map, probs, emb],
                    return_numpy=False)
                return gradients, out, embedding

        self.predict_fn = predict_fn
        self.paddle_prepared = True
Esempio n. 5
0
    def _paddle_prepare(self, predict_fn=None):
        if predict_fn is None:
            import paddle.fluid as fluid
            startup_prog = fluid.Program()
            main_program = fluid.Program()
            with fluid.program_guard(main_program, startup_prog):
                with fluid.unique_name.guard():
                    data_op = fluid.data(name='data',
                                         shape=[None] + self.model_input_shape,
                                         dtype=self.data_type)
                    label_op = fluid.data(name='label',
                                          shape=[None, 1],
                                          dtype='int64')
                    x_noise = fluid.data(name='noise',
                                         shape=[None] + self.model_input_shape,
                                         dtype='float32')

                    x_plus_noise = data_op + x_noise
                    probs = self.paddle_model(x_plus_noise)

                    for op in main_program.global_block().ops:
                        if op.type == 'batch_norm':
                            op._set_attr('use_global_stats', True)
                        elif op.type == 'dropout':
                            op._set_attr('dropout_prob', 0.0)

                    class_num = probs.shape[-1]
                    one_hot = fluid.layers.one_hot(label_op, class_num)
                    one_hot = fluid.layers.elementwise_mul(probs, one_hot)
                    target_category_loss = fluid.layers.reduce_sum(one_hot,
                                                                   dim=1)

                    p_g_list = fluid.backward.append_backward(
                        target_category_loss)
                    gradients_map = fluid.gradients(one_hot, x_plus_noise)[0]

            if self.use_cuda:
                gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
                place = fluid.CUDAPlace(gpu_id)
            else:
                place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            fluid.io.load_persistables(exe, self.trained_model_path,
                                       main_program)

            def predict_fn(data, labels, noise=0.0):
                if isinstance(noise, (float, int)):
                    noise = np.ones_like(data) * noise
                gradients, out = exe.run(main_program,
                                         feed={
                                             'data': data,
                                             'label': labels,
                                             'noise': noise
                                         },
                                         fetch_list=[gradients_map, probs])
                return gradients, out

        self.predict_fn = predict_fn
        self.paddle_prepared = True
Esempio n. 6
0
    def test_error(self):
        x = fluid.data(name='x', shape=[None, 2, 8, 8], dtype='float32')
        x.stop_gradient = False
        conv = fluid.layers.conv2d(x, 4, 1, bias_attr=False)
        y = fluid.layers.relu(conv)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients(y.name, x)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients(y, x.name)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients([y], [x], target_gradients=x.name)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients([y], x, no_grad_set=conv)
Esempio n. 7
0
def get_static_triple_grad(x,
                           y,
                           x_init=None,
                           dy_init=None,
                           place=None,
                           program=None):
    """
    Get Triple Grad result of static graph.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        dy_init (numpy.array|list[numpy.array]|None): the init value for output y.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
    Returns:
        A list of numpy array that stores third derivative result calulated by static graph.
    """
    if program is None:
        program = fluid.default_main_program()
    scope = fluid.executor.global_scope()
    y_grads = []
    for i in six.moves.xrange(len(y)):
        yi = y[i]
        dyi_name = _append_grad_suffix_(yi.name)
        np_type = dtype_to_np_dtype(yi.dtype)
        dy = program.global_block().create_var(name=dyi_name,
                                               shape=yi.shape,
                                               dtype=np_type,
                                               persistable=True)
        dy.stop_gradient = False
        set_var_in_scope(scope, place, dyi_name, dy_init[i])
        y_grads.append(dy)

    # append first order grads
    dx = fluid.gradients(y, x, y_grads)

    # y_grads are the input of first-order backward,
    # so, they are also the input of second-order backward.
    x += y_grads
    x_init += dy_init
    y = dx

    x_grads_grads_init = []
    for dxi in dx:
        np_type = dtype_to_np_dtype(dxi.dtype)
        value = np.ones(dxi.shape, dtype=np_type)
        x_grads_grads_init.append(value)

    return get_static_double_grad(x,
                                  y,
                                  x_init,
                                  dy_init=x_grads_grads_init,
                                  place=place,
                                  program=program)
Esempio n. 8
0
    def test2(self):
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            x = fluid.layers.create_parameter(
                name='x',
                shape=[1],
                dtype='float32',
                default_initializer=fluid.initializer.Constant(1))
            y = x * x
            dx1, = fluid.gradients(y, x)
            z = dx1 * dx1 + y * y
            dx2, = fluid.gradients(z, x)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup)
        out, = exe.run(main, fetch_list=[dx2])
        self.assertEqual(12, out[0])
Esempio n. 9
0
def calc_gradients(outputs, inputs, no_grad_set):
    if fluid.in_dygraph_mode():
        return fluid.dygraph.grad(outputs=outputs,
                                  inputs=inputs,
                                  no_grad_vars=no_grad_set,
                                  create_graph=True)
    else:
        return fluid.gradients(targets=outputs,
                               inputs=inputs,
                               no_grad_set=no_grad_set)
Esempio n. 10
0
    def gradient_penalty(self, f, real, fake=None, cfg=None, name=None):
        def _interpolate(a, b=None):
            a_shape = fluid.layers.shape(a)
            if b is None:
                if cfg.enable_ce:
                    beta = fluid.layers.uniform_random(shape=a_shape,
                                                       min=0.0,
                                                       max=1.0,
                                                       seed=1)
                else:
                    beta = fluid.layers.uniform_random(shape=a_shape,
                                                       min=0.0,
                                                       max=1.0)

                mean = fluid.layers.reduce_mean(a,
                                                dim=list(range(len(a.shape))))
                input_sub_mean = fluid.layers.elementwise_sub(a, mean, axis=0)
                var = fluid.layers.reduce_mean(
                    fluid.layers.square(input_sub_mean),
                    dim=list(range(len(a.shape))))
                b = beta * fluid.layers.sqrt(var) * 0.5 + a
            shape = [a.shape[0]]
            if cfg.enable_ce:
                alpha = fluid.layers.uniform_random(shape=a_shape[0],
                                                    min=0.0,
                                                    max=1.0,
                                                    seed=1)
            else:
                alpha = fluid.layers.uniform_random(shape=a_shape[0],
                                                    min=0.0,
                                                    max=1.0)

            inner = fluid.layers.elementwise_mul((b - a), alpha, axis=0) + a
            return inner

        x = _interpolate(real, fake)

        pred, _ = f(x, cfg=cfg, name=name)
        if isinstance(pred, tuple):
            pred = pred[0]
        vars = []
        for var in fluid.default_main_program().list_vars():
            if fluid.io.is_parameter(var) and var.name.startswith(
                    "discriminator"):
                vars.append(var.name)
        grad = fluid.gradients(pred, x, no_grad_set=vars)[0]
        grad_shape = grad.shape
        grad = fluid.layers.reshape(
            grad, [-1, grad_shape[1] * grad_shape[2] * grad_shape[3]])
        epsilon = 1e-16
        norm = fluid.layers.sqrt(
            fluid.layers.reduce_sum(fluid.layers.square(grad), dim=1) +
            epsilon)
        gp = fluid.layers.reduce_mean(fluid.layers.square(norm - 1.0))
        return gp
Esempio n. 11
0
    def test1(self):
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            net = lambda x: x * x
            x = fluid.layers.create_parameter(
                name='x',
                shape=[1],
                dtype='float32',
                default_initializer=fluid.initializer.Constant(3))
            grad1, = fluid.gradients(net(x), x)  # 2x = 6
            z = net(x - grad1)
            grad2, = fluid.gradients(z, x)  # gradients( (x - 2x)^2) = 2x = 6

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup)
        out = exe.run(main, fetch_list=[grad1.name, grad2.name])
        self.assertEqual(6, out[0][0])
        self.assertEqual(6, out[1][0])
Esempio n. 12
0
    def test_prune(self):
        x = fluid.data(name='x', shape=[3], dtype='float32')
        x.stop_gradient = False
        x1, x2, x3 = fluid.layers.split(x, dim=0, num_or_sections=3)
        y = x1 * 2
        x1_grad = fluid.gradients(y, x)

        exe = fluid.Executor(fluid.CPUPlace())
        main = fluid.default_main_program()
        exe.run(fluid.default_startup_program())
        out = exe.run(main,
                      feed={'x': np.ones([3]).astype('float32')},
                      fetch_list=[x1_grad])
        self.assertTrue(np.array_equal(out[0], [2., 0., 0.]))
Esempio n. 13
0
    def _construct_graph(self):
        train_program = fluid.Program()
        start_program = fluid.Program()
        with fluid.program_guard(train_program, start_program):
            self.x_ph = TensorList([
                fluid.layers.data('x_{}'.format(idx),
                                  v.shape,
                                  append_batch_size=False,
                                  stop_gradient=False)
                for idx, v in enumerate(self.x)
            ])

            # problem forward
            self.f0 = self.problem(self.x_ph)
            self.loss = self.problem.ip_output(self.f0, self.f0)
            # problem backward
            self.grad = TensorList(fluid.gradients(self.loss, self.x_ph))

        place = fluid.CUDAPlace(0)
        self.exe = fluid.Executor(place)
        self.exe.run(program=fluid.default_startup_program())
        self.compiled_prog = fluid.compiler.CompiledProgram(train_program)
Esempio n. 14
0
from paddle import fluid
import paddle.fluid.transpiler.details.program_utils as pu

# net = lambda x : x * x

x = fluid.layers.create_parameter(shape=[1], dtype='float32', default_initializer=fluid.initializer.Constant(2))

y = fluid.layers.elementwise_mul(x, x)
grad1 = fluid.gradients(y, x)[0] # 2x = 4

# pu.program_to_code(fluid.default_main_program(), skip_op_callstack=True)

z = fluid.layers.elementwise_sub(x, grad1)
y2 = fluid.layers.elementwise_mul(z, z)
grad2 = fluid.gradients(y2, x)[0] # gradients( (x - 2x)^2) = 2x = 4

pu.program_to_code(fluid.default_main_program(), skip_op_callstack=True)
                                                                            
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())

out_np = exe.run(fluid.default_main_program(), fetch_list=[grad1, grad2])
print(out_np)
Esempio n. 15
0
    def _static_forward(self, res, data=None, **kwargs):
        """
        _static_forward
        """
        if self.__layertype == "func":

            paddle.seed(self.seed)
            main_program = fluid.Program()
            startup_program = fluid.Program()
            params = copy.deepcopy(kwargs)
            with fluid.unique_name.guard():
                with fluid.program_guard(main_program=main_program,
                                         startup_program=startup_program):
                    # PS:没有单列出一个函数做值传递,因为self.kwargs只有一个,就没单列出来
                    xyz = []
                    for k, v in kwargs.items():
                        if isinstance(v, (np.generic, np.ndarray)):
                            # no_grad_Var不需要转换类型
                            if self.no_grad_var is not None and k in self.no_grad_var:
                                kwargs[k] = v
                            else:
                                kwargs[k] = v.astype(self.dtype)
                    for k, v in params.items():
                        if isinstance(v, (np.generic, np.ndarray)):
                            # no_grad_Var不需要转换类型
                            if self.no_grad_var is not None and k in self.no_grad_var:
                                params[k] = fluid.data(name=k,
                                                       shape=v.shape,
                                                       dtype=v.dtype)
                            else:
                                params[k] = fluid.data(name=k,
                                                       shape=v.shape,
                                                       dtype=self.dtype)
                            xyz.append(k)
                            # enable compute gradient
                            params[k].stop_gradient = False
                    output = self.func(**params)
                    if self.enable_backward:
                        loss = paddle.mean(output)
                        grad_var = {}
                        for k in xyz:
                            grad_var[k] = fluid.gradients(loss, params[k])
                        exe = fluid.Executor(self.place)
                        exe.run(startup_program)
                        # print(list(grad_var.values()))
                        # print([output] + list(grad_var.values()))
                        res = exe.run(main_program,
                                      feed=kwargs,
                                      fetch_list=[output] +
                                      list(grad_var.values()),
                                      return_numpy=True)
                        # combine grad
                        grad = dict(zip(xyz, res[1:]))
                        return res[0], grad
                    else:
                        exe = fluid.Executor(self.place)
                        exe.run(startup_program)
                        # print(list(grad_var.values()))
                        # print([output] + list(grad_var.values()))
                        res = exe.run(main_program,
                                      feed=kwargs,
                                      fetch_list=[output],
                                      return_numpy=True)
                        return res[0]
        elif self.__layertype == "class":
            main_program = fluid.Program()
            startup_program = fluid.Program()
            main_program.random_seed = self.seed
            startup_program.random_seed = self.seed
            params = copy.deepcopy(kwargs)
            with fluid.unique_name.guard():
                with fluid.program_guard(main_program=main_program,
                                         startup_program=startup_program):
                    # PS:没有单列出一个函数做值传递,因为self.kwargs只有一个,就没单列出来
                    for k, v in kwargs.items():
                        if isinstance(v, (np.generic, np.ndarray)):
                            # no_grad_Var不需要转换类型
                            if self.no_grad_var is not None and k in self.no_grad_var:
                                kwargs[k] = v
                            else:
                                kwargs[k] = v.astype(self.dtype)
                    for k, v in params.items():
                        if isinstance(v, (np.generic, np.ndarray)):
                            # no_grad_Var不需要转换类型
                            if self.no_grad_var is not None and k in self.no_grad_var:
                                params[k] = fluid.data(name=k,
                                                       shape=v.shape,
                                                       dtype=v.dtype)
                            else:
                                params[k] = fluid.data(name=k,
                                                       shape=v.shape,
                                                       dtype=self.dtype)
                            # enable compute gradient
                            params[k].stop_gradient = False
                    if data is not None:
                        data = data.astype(self.dtype)
                        self.data = fluid.data(name="data",
                                               shape=data.shape,
                                               dtype=self.dtype)
                        self.data.stop_gradient = False
                    data = dict({"data": data}, **kwargs)
                    obj = self.func(**params)
                    output = obj(self.data)
                    if self.enable_backward:
                        loss = paddle.mean(output)
                        g = fluid.gradients(loss, self.data)
                        exe = fluid.Executor(self.place)
                        exe.run(startup_program)
                        res = exe.run(main_program,
                                      feed=data,
                                      fetch_list=[output, g],
                                      return_numpy=True)
                        grad = {"data": res[1]}
                        return res[0], grad
                    else:
                        exe = fluid.Executor(self.place)
                        exe.run(startup_program)
                        res = exe.run(main_program,
                                      feed=data,
                                      fetch_list=[output],
                                      return_numpy=True)
                        return res[0]
Esempio n. 16
0
def compute_unrolled_step(image_train, label_train, image_val, label_val,
                          data_prog, startup_prog, lr, args):

    fetch = []
    unrolled_model_prog = data_prog.clone()
    with fluid.program_guard(unrolled_model_prog, startup_prog):
        # construct model graph
        train_logits, train_loss = model(image_train,
                                         label_train,
                                         args.init_channels,
                                         args.class_num,
                                         args.layers,
                                         name="model")
        # construct unrolled model graph
        logits, unrolled_train_loss = model(image_train,
                                            label_train,
                                            args.init_channels,
                                            args.class_num,
                                            args.layers,
                                            name="unrolled_model")

        all_params = unrolled_model_prog.global_block().all_parameters()
        model_var = utility.get_parameters(all_params, 'model')[1]
        unrolled_model_var = utility.get_parameters(all_params,
                                                    'unrolled_model')[1]

        # copy model_var to unrolled_model_var
        for m_var, um_var in zip(model_var, unrolled_model_var):
            fluid.layers.assign(m_var, um_var)

        unrolled_optimizer = fluid.optimizer.MomentumOptimizer(
            lr,
            args.momentum,
            regularization=fluid.regularizer.L2DecayRegularizer(
                args.weight_decay))
        unrolled_optimizer.minimize(
            unrolled_train_loss,
            parameter_list=[v.name for v in unrolled_model_var])
        fetch.append(unrolled_train_loss)
    logger.info("get unrolled_model")

    arch_optim_prog = data_prog.clone()
    with fluid.program_guard(arch_optim_prog, startup_prog):
        train_logits, train_loss = model(image_train,
                                         label_train,
                                         args.init_channels,
                                         args.class_num,
                                         args.layers,
                                         name="model")
        logits, unrolled_valid_loss = model(image_val,
                                            label_val,
                                            args.init_channels,
                                            args.class_num,
                                            args.layers,
                                            name="unrolled_model")

        all_params = arch_optim_prog.global_block().all_parameters()
        model_var = utility.get_parameters(all_params, 'model')[1]
        unrolled_model_var = utility.get_parameters(all_params,
                                                    'unrolled_model')[1]
        arch_var = utility.get_parameters(all_params, 'arch')[1]
        # get grad of unrolled_valid_loss
        valid_grads = fluid.gradients(unrolled_valid_loss, unrolled_model_var)
        eps = 1e-2 * fluid.layers.rsqrt(
            fluid.layers.sums([
                fluid.layers.reduce_sum(fluid.layers.square(valid_grad))
                for valid_grad in valid_grads
            ]))
        model_params_grads = list(zip(model_var, valid_grads))

        for param, grad in model_params_grads:
            param = fluid.layers.elementwise_add(
                param, fluid.layers.elementwise_mul(grad, eps))
        logger.info("get w+")

        logits, train_loss = model(image_train,
                                   label_train,
                                   args.init_channels,
                                   args.class_num,
                                   args.layers,
                                   name="model")
        train_grads_pos = fluid.gradients(train_loss, arch_var)
        grads_names = [v.name for v in train_grads_pos]
        for name in grads_names:
            arch_optim_prog.global_block()._rename_var(name, name + '_pos')
        logger.info("get train_gards_pos")

        # w- = w - eps*dw`"""
        for param, grad in model_params_grads:
            param = fluid.layers.elementwise_add(
                param, fluid.layers.elementwise_mul(grad, eps * -2))
        logger.info("get w-")

        logits, train_loss = model(image_train,
                                   label_train,
                                   args.init_channels,
                                   args.class_num,
                                   args.layers,
                                   name="model")
        train_grads_neg = fluid.gradients(train_loss, arch_var)
        for name in grads_names:
            arch_optim_prog.global_block()._rename_var(name, name + '_neg')
        logger.info("get train_gards_neg")

        # recover w
        for param, grad in model_params_grads:
            param = fluid.layers.elementwise_add(
                param, fluid.layers.elementwise_mul(grad, eps))
        logger.info("get w")

        leader_opt = fluid.optimizer.Adam(
            args.arch_learning_rate,
            0.5,
            0.999,
            regularization=fluid.regularizer.L2DecayRegularizer(
                args.arch_weight_decay))
        arch_params_grads = leader_opt.backward(
            unrolled_valid_loss, parameter_list=[v.name for v in arch_var])

        grads_p = [
            arch_optim_prog.global_block().var(name + '_pos')
            for name in grads_names
        ]
        grads_n = [
            arch_optim_prog.global_block().var(name + '_neg')
            for name in grads_names
        ]

        for i, (var, grad) in enumerate(arch_params_grads):
            arch_params_grads[i] = (var, grad - ((grads_p[i] - grads_n[i]) /
                                                 (eps * 2)) * lr)
        leader_opt.apply_gradients(arch_params_grads)
        logger.info("update alpha")
        fetch.append(unrolled_valid_loss)
        arch_progs_list = [unrolled_model_prog, arch_optim_prog]
    return arch_progs_list, fetch
Esempio n. 17
0
def get_static_double_grad(x,
                           y,
                           x_init=None,
                           dy_init=None,
                           place=None,
                           program=None):
    """
    Get Double Grad result of static graph.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        dy_init (numpy.array|list[numpy.array]|None): the init value for output y.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
    Returns:
        A list of numpy array that stores second derivative result calulated by static graph.
    """

    if program is None:
        program = fluid.default_main_program()
    scope = fluid.executor.global_scope()
    y_grads = []
    for i in six.moves.xrange(len(y)):
        yi = y[i]
        dyi_name = _append_grad_suffix_(yi.name)
        np_type = dtype_to_np_dtype(yi.dtype)
        dy = program.global_block().create_var(name=dyi_name,
                                               shape=yi.shape,
                                               dtype=np_type,
                                               persistable=True)
        dy.stop_gradient = False
        set_var_in_scope(scope, place, dyi_name, dy_init[i])
        y_grads.append(dy)

    # append first order grads
    dx = fluid.gradients(y, x, y_grads)

    # y_grads are the input of first-order backward,
    # so, they are also the input of second-order backward.
    x += y_grads
    x_init += dy_init

    # filter None in dx for DX/DY may be None in kernel
    filted_dx = [dxi for dxi in dx if dxi is not None]
    y = filted_dx

    # check input arguments
    x = _as_list(x)
    y = _as_list(y)

    for v in x:
        v.stop_gradient = False
        v.persistable = True
    if place is None:
        place = fluid.CPUPlace()
    if program is None:
        program = fluid.default_main_program()

    # init variable in strtup program
    scope = fluid.executor.global_scope()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    x_init = _as_list(x_init)
    # init inputs if x_init is not None
    if x_init:
        if len(x_init) != len(x):
            raise ValueError('len(x_init) (=%d) is not the same'
                             ' as len(x) (= %d)' % (len(x_init), len(x)))
        # init variable in main program
        for var, arr in zip(x, x_init):
            assert var.shape == arr.shape
        feeds = {k.name: v for k, v in zip(x, x_init)}
        exe.run(program, feed=feeds, scope=scope)

    dys = []
    for yi in y:
        np_type = dtype_to_np_dtype(yi.dtype)
        dy_name = _append_grad_suffix_(yi.name)
        # create dy Variable in Program
        dy = program.global_block().create_var(name=dy_name,
                                               shape=yi.shape,
                                               dtype=np_type,
                                               persistable=True)
        # init dy tensor in scope
        value = np.ones(yi.shape, dtype=np_type)
        dy_t = set_var_in_scope(scope, place, dy_name, value)
        dys.append(dy)

    # append second order backward
    ddx = fluid.gradients(y, x, dys)
    exe = fluid.Executor(place)

    # filter None in dx for DX/DY may be None in kernel
    # only fetch not None dx in exe.run
    filted = [(i, dxi) for i, dxi in enumerate(ddx) if dxi is not None]
    filted_idx, filted_ddx = zip(*filted)
    ddx_res = exe.run(program, scope=scope, fetch_list=filted_ddx)

    return ddx_res
Esempio n. 18
0
def triple_grad_check(x,
                      y,
                      x_init=None,
                      y_grads=None,
                      x_grads_grads=None,
                      place=None,
                      program=None,
                      eps=1e-6,
                      atol=1e-5,
                      rtol=1e-3,
                      raise_exception=True):
    """
    Check triple gradients. This function will append backward to the
    program before third order gradient check.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y.
        x_grads_grads (numpy.array|list[numpy.array]|None): the gradients with respect to your input.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
        eps (float): perturbation for finite differences.
        atol (float): absolute tolerance.
        rtol (float): relative tolerance.
        raise_exception (bool): whether to raise an exception if
            the check fails. Default is True.
    Returns:
        True if all differences satisfy numpy.allclose condition.
    """
    # check input arguments
    x = _as_list(x)
    for v in x:
        v.stop_gradient = False
        v.persistable = True
    y = _as_list(y)

    if program is None:
        program = fluid.default_main_program()

    if y_grads is None:
        scope = fluid.executor.global_scope()
        y_grads = []
        y_grads_init = []
        for yi in y:
            dyi_name = _append_grad_suffix_(yi.name)
            np_type = dtype_to_np_dtype(yi.dtype)
            dy = program.global_block().create_var(name=dyi_name,
                                                   shape=yi.shape,
                                                   dtype=np_type,
                                                   persistable=True)
            dy.stop_gradient = False
            v = np.random.random(size=yi.shape).astype(np_type)
            set_var_in_scope(scope, place, dyi_name, v)
            y_grads.append(dy)
            y_grads_init.append(v)
    else:
        y_grads = _as_list(y_grads)
        y_grads_init = [
            var_to_np_array_in_scope(scope, place, v.name) for v in y_grads
        ]

    # append first order grads
    target_grads = fluid.gradients(y, x, y_grads)

    if x_grads_grads is None:
        scope = fluid.executor.global_scope()
        x_grads_grads = []
        x_grads_grads_init = []
        for dxi in target_grads:
            ddxi_name = _append_grad_suffix_(dxi.name)
            np_type = dtype_to_np_dtype(dxi.dtype)
            ddx = program.global_block().create_var(name=ddxi_name,
                                                    shape=dxi.shape,
                                                    dtype=np_type,
                                                    persistable=True)
            ddx.stop_gradient = False
            v = np.random.random(size=dxi.shape).astype(np_type)
            set_var_in_scope(scope, place, ddxi_name, v)
            x_grads_grads.append(ddx)
            x_grads_grads_init.append(v)
    else:
        x_grads_grads = _as_list(x_grads_grads)
        x_grads_grads_init = [
            var_to_np_array_in_scope(scope, place, v.name)
            for v in x_grads_grads
        ]
    x += y_grads
    x_init = _as_list(x_init)
    x_init += y_grads_init

    # append second order grads
    target_grads_grads = fluid.gradients(target_grads, x, x_grads_grads)

    # filter None in target_grads_grads for Dy/Dx may be None in kernel
    filted = [(i, dyi) for i, dyi in enumerate(target_grads_grads)
              if dyi is not None]
    filted_idx, filted_target_grads_grads = zip(*filted)

    x += x_grads_grads
    x_init += x_grads_grads_init

    # x <=> [x, dout, ddx]
    grad_check(x=x,
               y=filted_target_grads_grads,
               x_init=x_init,
               place=place,
               program=program,
               eps=eps,
               atol=atol,
               rtol=rtol)
Esempio n. 19
0
def double_grad_check(x,
                      y,
                      x_init=None,
                      y_grads=None,
                      place=None,
                      program=None,
                      eps=1e-6,
                      atol=1e-5,
                      rtol=1e-3,
                      raise_exception=True):
    """
    Check gradients of gradients. This function will append backward to the
    program before second order gradient check.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
        eps (float): perturbation for finite differences.
        atol (float): absolute tolerance.
        rtol (float): relative tolerance.
        raise_exception (bool): whether to raise an exception if
            the check fails. Default is True.
    Returns:
        True if all differences satisfy numpy.allclose condition.
    """
    # check input arguments
    x = _as_list(x)
    for v in x:
        v.stop_gradient = False
        v.persistable = True
    y = _as_list(y)

    if program is None:
        program = fluid.default_main_program()

    if y_grads is None:
        scope = fluid.executor.global_scope()
        y_grads = []
        y_grads_init = []
        for yi in y:
            dyi_name = _append_grad_suffix_(yi.name)
            np_type = dtype_to_np_dtype(yi.dtype)
            dy = program.global_block().create_var(name=dyi_name,
                                                   shape=yi.shape,
                                                   dtype=np_type,
                                                   persistable=True)
            dy.stop_gradient = False
            v = np.random.random(size=yi.shape).astype(np_type)
            set_var_in_scope(scope, place, dyi_name, v)
            y_grads.append(dy)
            y_grads_init.append(v)
    else:
        y_grads = _as_list(y_grads)
        y_grads_init = [
            var_to_np_array_in_scope(scope, place, v.name) for v in y_grads
        ]

    # append first order grads
    target_grads = fluid.gradients(y, x, y_grads)

    # y_grads are the input of first-order backward,
    # so, they are also the input of second-order backward.
    x += y_grads
    x_init = _as_list(x_init)
    x_init += y_grads_init

    grad_check(x, target_grads, x_init, place, program, eps, atol, rtol)
Esempio n. 20
0
print('v_shape', v_data.shape)
t_data = np.array([[0, 1, 2]], dtype=np.int32)
print('t_shape', t_data.shape)

vertices = fluid.layers.data(name="vertices", shape=[-1, 4], dtype="float32")
triangles = fluid.layers.data(name="triangles", shape=[-1, 3], dtype="int32")
barycentric_coordinates, triangle_ids, z_buffer = fluid.layers.rasterize_triangles(
    vertices, triangles, image_height=4, image_width=4)

#gradient
#grd_value = np.random.random((4, 4, 3)).astype('float32')
grd_value = 1000 * np.ones((4,4,3)).astype('float32')
grd_var = fluid.layers.data(name='grd_var', shape=(4, 4, 3), append_batch_size=False, dtype='float32')
vertices.stop_gradient = False
grd = fluid.gradients([barycentric_coordinates], vertices, target_gradients= grd_var)


place = fluid.CPUPlace()
exe = fluid.Executor(place=place)
exe.run(fluid.default_startup_program())

fetch_list = [barycentric_coordinates.name, triangle_ids.name, z_buffer.name, grd[0].name]

#profiler.start_profiler('All')
bc, ti, zb, grd_out = exe.run(feed = {'vertices':v_data, 'triangles':t_data}, fetch_list=fetch_list)
#profiler.stop_profiler('total', '/tmp/profile')

#np.save('res_paddle/bary_coor_paddle.npy', np.array(bc))
#np.save('res_paddle/tri_ids_paddle.npy', np.array(ti))
#np.save('res_paddle/z_b_paddle.npy', np.array(zb))
Esempio n. 21
0
    def _construct_graph(self):
        train_program = fluid.Program()
        start_program = fluid.Program()
        with fluid.program_guard(train_program, start_program):
            scope = 'first/'
            self.x_ph = TensorList([
                fluid.layers.data('{}x_{}'.format(scope, idx),
                                  v.shape,
                                  append_batch_size=False,
                                  stop_gradient=False)
                for idx, v in enumerate(self.x)
            ])
            self.p_ph = TensorList([
                fluid.layers.data('{}p_{}'.format(scope, idx),
                                  v.shape,
                                  append_batch_size=False,
                                  stop_gradient=False)
                for idx, v in enumerate(self.x)
            ])

            # problem forward
            self.f0 = self.problem(self.x_ph, scope)

            self.g = self.f0.apply(static_clone)

            # Get df/dx^t @ f0
            self.dfdxt_g = TensorList(
                fluid.gradients(self.f0, self.x_ph, self.g))

            # For computing A
            tmp = [a * b for a, b in zip(self.dfdxt_g, self.p_ph)]
            self.dfdx_x = TensorList(fluid.gradients(tmp, self.g))
            # self.dfdx_x = TensorList(fluid.gradients(self.dfdxt_g, self.g, self.p_ph))

        train_program2 = fluid.Program()
        start_program2 = fluid.Program()
        with fluid.program_guard(train_program2, start_program2):
            scope = 'second/'
            self.x_ph_2 = TensorList([
                fluid.layers.data('{}x_{}'.format(scope, idx),
                                  v.shape,
                                  append_batch_size=False,
                                  stop_gradient=False)
                for idx, v in enumerate(self.x)
            ])
            self.dfdx_x_ph = TensorList([
                fluid.layers.data('{}dfdx_x_{}'.format(scope, idx),
                                  v.shape,
                                  append_batch_size=False,
                                  stop_gradient=False)
                for idx, v in enumerate(self.g)
            ])

            self.f0_2 = self.problem(self.x_ph_2, scope)
            self.dfdx_dfdx = TensorList(
                fluid.gradients(self.f0_2 * self.dfdx_x_ph, self.x_ph_2))

        place = fluid.CUDAPlace(0)
        self.exe = fluid.Executor(place)
        self.exe.run(program=fluid.default_startup_program())
        self.compiled_prog = fluid.compiler.CompiledProgram(train_program)

        place2 = fluid.CUDAPlace(0)
        self.exe2 = fluid.Executor(place2)
        self.exe2.run(program=fluid.default_startup_program())
        self.compiled_prog2 = fluid.compiler.CompiledProgram(train_program2)
Esempio n. 22
0
    lr = 0.1

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs = P.data(name='input_1', shape=[-1, 3, 28, 28], append_batch_size=False, dtype='float32')
            conv01_out_tensor = fluid.layers.conv2d(input=inputs, num_filters=7, filter_size=1, stride=1, padding=0,
                                                    param_attr=ParamAttr(name="conv01_weights"),
                                                    bias_attr=ParamAttr(name="conv01_bias"))
            conv02_out_tensor = fluid.layers.conv2d(input=conv01_out_tensor, num_filters=8, filter_size=4, stride=2, padding=1,
                                                    param_attr=ParamAttr(name="conv02_weights"),
                                                    bias_attr=ParamAttr(name="conv02_bias"))

            grad = fluid.gradients(conv02_out_tensor, conv01_out_tensor, no_grad_set=None)[0]


            # 建立损失函数
            y_true = P.data(name='y_true', shape=[-1, 8, 14, 14], append_batch_size=False, dtype='float32')
            # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。
            mseloss = P.pow(y_true - conv02_out_tensor, 2)
            mseloss = P.reduce_mean(mseloss)       # 再求平均,即mse损失函数

            # 优化器,选SGD
            optimizer = fluid.optimizer.SGD(learning_rate=lr)
            optimizer.minimize(mseloss)


    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
Esempio n. 23
0
    def _paddle_prepare(self, predict_fn=None):
        if predict_fn is None:
            import paddle.fluid as fluid
            startup_prog = fluid.Program()
            main_program = fluid.Program()
            with fluid.program_guard(main_program, startup_prog):
                with fluid.unique_name.guard():

                    image_op = fluid.data(name='image',
                                          shape=[1] + self.model_input_shape,
                                          dtype='float32')
                    label_op = fluid.layers.data(name='label',
                                                 shape=[1],
                                                 dtype='int64')

                    probs = self.paddle_model(image_op)
                    if isinstance(probs, tuple):
                        probs = probs[0]

                    # manually switch the model to test mode
                    for op in main_program.global_block().ops:
                        if op.type == 'batch_norm':
                            op._set_attr('use_global_stats', True)
                        elif op.type == 'dropout':
                            op._set_attr('dropout_prob', 0.0)

                    # fetch the target layer
                    trainable_vars = list(main_program.list_vars())
                    for v in trainable_vars:
                        if v.name == self.target_layer_name:
                            conv = v

                    class_num = probs.shape[-1]
                    one_hot = fluid.layers.one_hot(label_op, class_num)
                    one_hot = fluid.layers.elementwise_mul(probs, one_hot)
                    target_category_loss = fluid.layers.reduce_sum(one_hot)
                    # target_category_loss = - fluid.layers.cross_entropy(probs, label_op)[0]

                    # add back-propagration
                    p_g_list = fluid.backward.append_backward(
                        target_category_loss)
                    # calculate the gradients w.r.t. the target layer
                    gradients_map = fluid.gradients(target_category_loss,
                                                    conv)[0]

            if self.use_cuda:
                gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
                place = fluid.CUDAPlace(gpu_id)
            else:
                place = fluid.CPUPlace()
            exe = fluid.Executor(place)

            fluid.io.load_persistables(exe, self.trained_model_path,
                                       main_program)

            def predict_fn(data):
                # if label is None, let it be the most likely label
                if self.label is None:
                    out = exe.run(main_program,
                                  feed={
                                      'image': data,
                                      'label': np.array([[0]])
                                  },
                                  fetch_list=[probs])

                    self.label = np.argmax(out[0][0])

                feature_map, gradients = exe.run(
                    main_program,
                    feed={
                        'image': data,
                        'label': np.array([[self.label]])
                    },
                    fetch_list=[conv, gradients_map])
                return feature_map, gradients

        self.predict_fn = predict_fn
        self.paddle_prepared = True
Esempio n. 24
0
def create_model(args,
                 pyreader_name,
                 ernie_config,
                 is_prediction=False,
                 task_name=""):
    pyreader = fluid.layers.py_reader(capacity=50,
                                      shapes=[[-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, 1], [-1, 1]],
                                      dtypes=[
                                          'int64', 'int64', 'int64', 'int64',
                                          'float32', 'int64', 'int64'
                                      ],
                                      lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                      name=task_name + "_" + pyreader_name,
                                      use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels,
     qids) = fluid.layers.read_file(pyreader)

    def _model(is_noise=False):
        ernie = ErnieModel(src_ids=src_ids,
                           position_ids=pos_ids,
                           sentence_ids=sent_ids,
                           task_ids=task_ids,
                           input_mask=input_mask,
                           config=ernie_config,
                           is_noise=is_noise)

        cls_feats = ernie.get_pooled_output()
        if not is_noise:
            cls_feats = fluid.layers.dropout(
                x=cls_feats,
                dropout_prob=0.1,
                dropout_implementation="upscale_in_train")
        logits = fluid.layers.fc(
            input=cls_feats,
            size=args.num_labels,
            param_attr=fluid.ParamAttr(
                name=task_name + "_cls_out_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name=task_name + "_cls_out_b",
                initializer=fluid.initializer.Constant(0.)))
        """
        if is_prediction:
            probs = fluid.layers.softmax(logits)
            feed_targets_name = [
                src_ids.name, sent_ids.name, pos_ids.name, input_mask.name
            ]
            if ernie_version == "2.0":
                feed_targets_name += [task_ids.name]
            return pyreader, probs, feed_targets_name
        """

        num_seqs = fluid.layers.create_tensor(dtype='int64')
        ## add focal loss
        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
            logits=logits, label=labels, return_softmax=True)
        loss = fluid.layers.mean(x=ce_loss)
        accuracy = fluid.layers.accuracy(input=probs,
                                         label=labels,
                                         total=num_seqs)
        graph_vars = {
            "loss": loss,
            "probs": probs,
            "accuracy": accuracy,
            "labels": labels,
            "num_seqs": num_seqs,
            "qids": qids
        }
        return graph_vars

    if not is_prediction:
        graph_vars = _model(is_noise=True)
        old_loss = graph_vars["loss"]
        token_emb = fluid.default_main_program().global_block().var(
            "word_embedding")
        # print(token_emb)
        token_emb.stop_gradient = False
        token_gradient = fluid.gradients(old_loss, token_emb)[0]
        token_gradient.stop_gradient = False
        epsilon = 1e-8
        norm = (fluid.layers.sqrt(
            fluid.layers.reduce_sum(fluid.layers.square(token_gradient)) +
            epsilon))
        gp = (0.01 * token_gradient) / norm
        gp.stop_gradient = True
        fluid.layers.assign(token_emb + gp, token_emb)
        graph_vars = _model()
        fluid.layers.assign(token_emb - gp, token_emb)
    else:
        graph_vars = _model()

    return pyreader, graph_vars
Esempio n. 25
0
import paddle.fluid as fluid

#define operation
w = fluid.data(name='w', shape=[None, 1], dtype='float32')
w.stop_gradient = False
loss = w * w
grad = fluid.gradients([loss], w)
#Define Exector
cpu = fluid.core.CPUPlace()
exe = fluid.Executor(cpu)
exe.run(fluid.default_startup_program())
#Prepare data
x = numpy.ones((1, 1))
x = x.astype('float32')
#Run computing
outs = exe.run(feed={'w': x}, fetch_list=[loss, grad])
print('loss: {}, grad: {}'.format(outs[0][0], outs[1][0]))
Esempio n. 26
0
def _compute_analytical_jacobian(program, x, y, place, scope):
    """Computes the analytical Jacobian for dy/dx.

    Args:
        program (Program): a Program with forward pass.
        x (Variable|list[Variable]): a variable or list of variable
        y (Variable): the target variable.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        scope (Scope): the scope used to run program.

    Returns:
        A list of 2-D numpy array. The list length is len(x).
        Each 2-D numpy array represents the Jacobian for dy/dx_i.
        It has "xi_size" rows and "dy_size" columns
        where "x_size" is the number of elements in x_i and
        "dy_size" is the number of elements in y.
    """
    if not isinstance(y, fluid.framework.Variable):
        raise TypeError('y is not Variable')

    dy_name = _append_grad_suffix_(y.name)

    np_type = dtype_to_np_dtype(y.dtype)
    # create dy Variable in Program
    dy = program.global_block().create_var(name=dy_name,
                                           shape=y.shape,
                                           dtype=np_type,
                                           persistable=True)
    # append backward
    dx = fluid.gradients(y, x, dy)

    # init dy tensor in scope
    value = np.zeros(y.shape, dtype=np_type)
    dy_t = set_var_in_scope(scope, place, dy_name, value)

    exe = fluid.Executor(place)

    y_size = _product(y.shape)

    x = _as_list(x)
    jacobian = make_jacobian(x, y_size, np_type)

    # filter None in dx for DX/DY may be None in kernel
    # only fetch not None dx in exe.run
    filted = [(i, dxi) for i, dxi in enumerate(dx) if dxi is not None]
    filted_idx, filted_dx = zip(*filted)

    for i in six.moves.xrange(y_size):
        _set_item(dy_t, i, 1, np_type)

        dx_res = exe.run(program, scope=scope, fetch_list=filted_dx)

        for j in six.moves.xrange(len(filted_dx)):
            dx_idx = filted_idx[j]
            if dx_res[j] is not None:
                jacobian[dx_idx][:, i] = dx_res[j].flatten()
            else:
                jacobian[dx_idx][:, i] = np.zeros(dx[dx_idx].shape,
                                                  dtype=np_type).flatten()

        _set_item(dy_t, i, 0, np_type)

    return jacobian
Esempio n. 27
0
def dlg_attack(args, feature, label, network, exe, origin_grad):
    """
    The implementation of DLG attack.
    :param args: the parameters for dlg attack
    :param feature: the variable of feature
    :param label: the variable of label
    :param network: the network of model which is trained
    :param exe: the same executor with normal training procedure
    :param origin_grad: the original gradients of model params
    generated by target data, i.e., the data which is being attacked.
    :return:
    """
    main_program = fluid.Program()
    # use a new program
    with fluid.program_guard(main_program):
        # the dummy feature, which aims to imitate the target data
        dummy_x = fluid.data(name="dummy_x",
                             shape=list(feature.shape),
                             dtype=feature.dtype)
        # let dummy_x can be updated
        dummy_x.stop_gradient = False

        # the dummy_label
        dummy_y = fluid.data(name="dummy_y",
                             shape=list(label.shape),
                             dtype=label.dtype)
        # let dummy_y can be updated
        dummy_y.stop_gradient = False

        # use the model network of training
        _, dummy_loss = network(dummy_x, dummy_y)

        # get gradients of params that can be trainable
        all_params = main_program.global_block().all_parameters()
        grad_params = [param for param in all_params if param.trainable]
        dummy_grads = fluid.gradients(dummy_loss, grad_params)

        # original gradients
        origin_grad_vars = []
        for g_id, origin_g in enumerate(origin_grad):
            grad_name = "origin_g_" + str(g_id)
            grad_shape = origin_g.shape
            grad = fluid.data(name=grad_name,
                              shape=grad_shape,
                              dtype=origin_g.dtype)
            origin_grad_vars.append(grad)

        # the target loss of optimization, i.e., the difference
        # between gradients of model parameters generated respectively
        # by target data and dummy data
        diff_loss = 0.0
        for orig_g, dum_g in zip(origin_grad_vars, dummy_grads):
            cur_loss = fluid.layers.square_error_cost(orig_g, dum_g)
            cur_loss = fluid.layers.reduce_mean(cur_loss)
            diff_loss += cur_loss

        mean_diff_loss = fluid.layers.mean(diff_loss)

        # the gradient of dummy_x
        grad_of_x = fluid.gradients(mean_diff_loss, dummy_x)

    dummy_feature_shape = [1 if d == -1 else d for d in list(feature.shape)]
    dummy_label_shape = [1 if d == -1 else d for d in list(label.shape)]

    # Generate dummy target data. The main two types, i.e., float32 and int64,
    # are used here for feature and label variables respectively, which can be
    # changed according to different types in different scenarios.
    dummy_feature = numpy.random.normal(
        0, 1, size=dummy_feature_shape).astype("float32")
    dummy_label = numpy.zeros(shape=dummy_label_shape).astype("int64")

    feed_dict = {}
    # add original gradients into feed_dict
    for idx, orig_g in enumerate(origin_grad):
        key = "origin_g_" + str(idx)
        feed_dict[key] = orig_g

    # the time of starting attack
    start = time.time()

    for iteration in range(args.iterations):
        feed_dict["dummy_x"] = dummy_feature
        feed_dict["dummy_y"] = dummy_label

        result = exe.run(main_program,
                         feed=feed_dict,
                         fetch_list=[mean_diff_loss] + grad_of_x)
        grad_diff_loss, feature_grad = result[0][0], result[1:]

        # update dummy_x with it's gradient
        feature_grad = numpy.array(feature_grad).reshape(dummy_feature_shape)
        dummy_feature = numpy.add(dummy_feature,
                                  args.learning_rate * feature_grad)
        dummy_feature = numpy.array(dummy_feature)

        # the shape of target image
        img_shape = dummy_feature_shape[-2:]

        # save attack results per 100 iterations
        if iteration % 100 == 0:
            print("Attack Iteration {}: grad_diff_loss = {}".format(
                iteration, grad_diff_loss))
            if not os.path.exists(args.result_dir):
                os.makedirs(args.result_dir)
            img = Image.fromarray(
                (dummy_feature * 255).reshape(img_shape).astype(numpy.uint8))
            img.save(args.result_dir + "/result_{}.png".format(iteration))

    end = time.time()
    print("Attack cost time in seconds: {}".format(end - start))
    # exit after attack finished
    exit("Attack finished.")
Esempio n. 28
0
out8 = fluid.layers.softmax(out_logit8)

fluid.io.load_persistables(exe, pretrained_model, main_program=main_programs)
print('ok')
init_prog(main_programs)
eval_program = main_programs.clone(for_test=True)

label = fluid.layers.data(name="label", shape=[1], dtype='int64')
y = fluid.layers.data(name="y", shape=[8], dtype='int64')
out_logits = (out_logit1[:, :121] * y[0] + out_logit2 * y[1] +
              out_logit3 * y[2] + out_logit4 * y[3] + out_logit5 * y[4] +
              out_logit6 * y[5] + out_logit7 * y[6] + out_logit8 * y[7]) / (
                  y[0] + y[1] + y[2] + y[3] + y[4] + y[5] + y[6] + y[7])
out = fluid.layers.softmax(out_logits)
loss = fluid.layers.cross_entropy(input=out, label=label)
gradients = fluid.gradients(targets=loss, inputs=[input_layer])[0]


def inference(img):
    result1, result2, result3, result4, result5, result6, result7, result8 = exe.run(
        eval_program,
        fetch_list=[out1, out2, out3, out4, out5, out6, out7, out8],
        feed={'image': img})
    result1 = result1[0, :121]
    pred1 = np.argmax(result1)
    result2 = result2[0]
    pred2 = np.argmax(result2)
    result3 = result3[0]
    pred3 = np.argmax(result3)
    result4 = result4[0]
    pred4 = np.argmax(result4)