def test_calc_gradient(self):
     x = layers.create_parameter(dtype="float32", shape=[5, 10])
     y = layers.create_parameter(dtype="float32", shape=[10, 8])
     mul_out = layers.mul(x=x, y=y)
     mean_out = layers.mean(mul_out)
     a = calc_gradient(mean_out, mul_out)
     b = calc_gradient(mean_out, x)
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     exe.run(fluid.default_startup_program())
     exe.run(fluid.default_main_program(), feed={}, fetch_list=[a, b])
Example #2
0
 def test_calc_gradient(self):
     main = fluid.Program()
     startup = fluid.Program()
     with fluid.program_guard(main, startup):
         x = layers.create_parameter(dtype="float32", shape=[5, 10])
         y = layers.create_parameter(dtype="float32", shape=[10, 8])
         mul_out = layers.mul(x=x, y=y)
         mean_out = layers.mean(mul_out)
         a = calc_gradient(mean_out, mul_out)
         b = calc_gradient(mean_out, x)
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     exe.run(startup)
     exe.run(main, feed={}, fetch_list=[a, b])
Example #3
0
loss = content_weight * content_loss(base_image_features, combination_features)
feature_layers = [
    'block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1',
    'block5_conv1'
]

for layer_name in feature_layers:
    layer_features = outputs_dict[layer_name]
    style_reference_features = layer_features[1]
    combination_features = layer_features[2]
    sl = style_loss(style_reference_features, combination_features)
    loss += (style_weight / len(feature_layers)) * sl
loss += total_variation_weight * total_variation_loss(combination_data)

# get the gradients of the generated image wrt the loss
grads = calc_gradient(loss, combination_data)

fetch = [loss.name]
if isinstance(grads, (list, tuple)):
    fetch.append(grads[0].name)
else:
    fetch.append(grads.name)

optimizer = fluid.optimizer.SGD(0.0)
optimizer.backward(loss=loss)

test_program = fluid.default_main_program()

exe = fluid.Executor(fluid.CUDAPlace(1))
exe.run(fluid.default_startup_program())
def double_grad_check(x,
                      y,
                      x_init=None,
                      y_grads=None,
                      place=None,
                      program=None,
                      eps=1e-6,
                      atol=1e-5,
                      rtol=1e-3,
                      raise_exception=True):
    """
    Check gradients of gradients. This function will append backward to the
    program before second order gradient check.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
        eps (float): perturbation for finite differences.
        atol (float): absolute tolerance.
        rtol (float): relative tolerance.
        raise_exception (bool): whether to raise an exception if
            the check fails. Default is True.
    Returns:
        True if all differences satisfy numpy.allclose condition.
    """
    # check input arguments
    x = _as_list(x)
    for v in x:
        v.stop_gradient = False
        v.persistable = True
    y = _as_list(y)

    if program is None:
        program = fluid.default_main_program()

    if y_grads is None:
        scope = fluid.executor.global_scope()
        y_grads = []
        y_grads_init = []
        for yi in y:
            dyi_name = _append_grad_suffix_(yi.name)
            np_type = dtype_to_np_dtype(yi.dtype)
            dy = program.global_block().create_var(name=dyi_name,
                                                   shape=yi.shape,
                                                   dtype=np_type,
                                                   persistable=True)
            dy.stop_gradient = False
            v = np.random.random(size=yi.shape).astype(np_type)
            set_var_in_scope(scope, place, dyi_name, v)
            y_grads.append(dy)
            y_grads_init.append(v)
    else:
        y_grads = _as_list(y_grads)
        y_grads_init = [
            var_to_np_array_in_scope(scope, place, v.name) for v in y_grads
        ]

    # append first order grads
    target_grads = calc_gradient(y, x, y_grads)

    # y_grads are the input of first-order backward,
    # so, they are also the input of second-order backward.
    x += y_grads
    x_init = _as_list(x_init)
    x_init += y_grads_init

    grad_check(x, target_grads, x_init, place, program, eps, atol, rtol)
def _compute_analytical_jacobian(program, x, y, place, scope):
    """Computes the analytical Jacobian for dy/dx.

    Args:
        program (Program): a Program with forward pass.
        x (Variable|list[Variable]): a variable or list of variable
        y (Variable): the target variable.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        scope (Scope): the scope used to run program.

    Returns:
        A list of 2-D numpy array. The list length is len(x).
        Each 2-D numpy array represents the Jacobian for dy/dx_i.
        It has "xi_size" rows and "dy_size" columns
        where "x_size" is the number of elements in x_i and
        "dy_size" is the number of elements in y.
    """
    if not isinstance(y, fluid.framework.Variable):
        raise TypeError('y is not Variable')

    dy_name = _append_grad_suffix_(y.name)

    np_type = dtype_to_np_dtype(y.dtype)
    # create dy Variable in Program
    dy = program.global_block().create_var(name=dy_name,
                                           shape=y.shape,
                                           dtype=np_type,
                                           persistable=True)
    # append backward
    dx = calc_gradient(y, x, dy)

    # init dy tensor in scope
    value = np.zeros(y.shape, dtype=np_type)
    dy_t = set_var_in_scope(scope, place, dy_name, value)

    exe = fluid.Executor(place)

    y_size = _product(y.shape)

    x = _as_list(x)
    jacobian = make_jacobian(x, y_size, np_type)

    # filter None in dx for DX/DY may be None in kernel
    # only fetch not None dx in exe.run
    filted = [(i, dxi) for i, dxi in enumerate(dx) if dxi is not None]
    filted_idx, filted_dx = zip(*filted)

    for i in six.moves.xrange(y_size):
        _set_item(dy_t, i, 1, np_type)

        dx_res = exe.run(program, scope=scope, fetch_list=filted_dx)

        for j in six.moves.xrange(len(filted_dx)):
            dx_idx = filted_idx[j]
            if dx_res[j] is not None:
                jacobian[dx_idx][:, i] = dx_res[j].flatten()
            else:
                jacobian[dx_idx][:, i] = np.zeros(dx[dx_idx].shape,
                                                  dtype=np_type).flatten()

        _set_item(dy_t, i, 0, np_type)

    return jacobian
    def net(self, input, class_dim=1000):
        x = self.inception_stem(input)

        for i in range(4):
            x = self.inceptionA(x, name=str(i + 1))
            if i == 3:
                x1 = x
        x = self.reductionA(x1)

        x1_coeff = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1.0)
        x2_coeff = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0)
        x3_coeff = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.5)
        x4_coeff = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.2)

        ep = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1e-10)

        for i in range(7):
            x = self.inceptionB(x, name=str(i + 1))
            if i == 0:
                x2 = x
            if i == 4:
                x3 = x
        x = self.reductionB(x)

        for i in range(3):
            x = self.inceptionC(x, name=str(i + 1))
            if i == 0:
                x4 = x

        pool = fluid.layers.pool2d(
            input=x, pool_size=8, pool_type='avg', global_pooling=True)



        # loss
        scaling = fluid.layers.reduce_prod(fluid.layers.cast(fluid.layers.shape(x1), 'float32'))
        loss = x1_coeff * fluid.layers.reduce_sum(fluid.layers.square(x1[:, :, 2: -2, 2: -2])) / scaling

        scaling = fluid.layers.reduce_prod(fluid.layers.cast(fluid.layers.shape(x2), 'float32'))
        loss += x2_coeff * fluid.layers.reduce_sum(fluid.layers.square(x2[:, :, 2: -2, 2: -2])) / scaling

        scaling = fluid.layers.reduce_prod(fluid.layers.cast(fluid.layers.shape(x3), 'float32'))
        loss += x3_coeff * fluid.layers.reduce_sum(fluid.layers.square(x3[:, :, 2: -2, 2: -2])) / scaling

        scaling = fluid.layers.reduce_prod(fluid.layers.cast(fluid.layers.shape(x4), 'float32'))
        loss += x4_coeff * fluid.layers.reduce_sum(fluid.layers.square(x4[:, :, 2: -2, 2: -2])) / scaling

        # grad
        grad = calc_gradient(loss, input)
        grad = grad / fluid.layers.elementwise_max(fluid.layers.reduce_mean(fluid.layers.abs(grad)), ep) 



        drop = fluid.layers.dropout(x=pool, dropout_prob=0.2)

        stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
        out = fluid.layers.fc(
            input=drop,
            size=class_dim,
            param_attr=ParamAttr(
                initializer=fluid.initializer.Uniform(-stdv, stdv),
                name="final_fc_weights"),
            bias_attr=ParamAttr(
                initializer=fluid.initializer.Uniform(-stdv, stdv),
                name="final_fc_offset"))
        return loss, grad, out