Python _append_grad_suffix_の例、paddle.fluid.backward._append_grad_suffix_ Pythonの例

コード例 #1

0

ファイルを表示

ファイル: gradient_checker.py プロジェクト: sandyhouse/Paddle

def get_static_triple_grad(x,
                           y,
                           x_init=None,
                           dy_init=None,
                           place=None,
                           program=None):
    """
    Get Triple Grad result of static graph.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        dy_init (numpy.array|list[numpy.array]|None): the init value for output y.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
    Returns:
        A list of numpy array that stores third derivative result calulated by static graph.
    """
    if program is None:
        program = fluid.default_main_program()
    scope = fluid.executor.global_scope()
    y_grads = []
    for i in six.moves.xrange(len(y)):
        yi = y[i]
        dyi_name = _append_grad_suffix_(yi.name)
        np_type = dtype_to_np_dtype(yi.dtype)
        dy = program.global_block().create_var(name=dyi_name,
                                               shape=yi.shape,
                                               dtype=np_type,
                                               persistable=True)
        dy.stop_gradient = False
        set_var_in_scope(scope, place, dyi_name, dy_init[i])
        y_grads.append(dy)

    # append first order grads
    dx = fluid.gradients(y, x, y_grads)

    # y_grads are the input of first-order backward,
    # so, they are also the input of second-order backward.
    x += y_grads
    x_init += dy_init
    y = dx

    x_grads_grads_init = []
    for dxi in dx:
        np_type = dtype_to_np_dtype(dxi.dtype)
        value = np.ones(dxi.shape, dtype=np_type)
        x_grads_grads_init.append(value)

    return get_static_double_grad(x,
                                  y,
                                  x_init,
                                  dy_init=x_grads_grads_init,
                                  place=place,
                                  program=program)

コード例 #2

0

ファイルを表示

ファイル: backward.py プロジェクト: zpcalan/PaddleFL

def _create_loss_op_desc_(loss):
    op_desc = backward._create_op_desc_(
        "fill_constant", {},
        {"Out": [backward._append_grad_suffix_(loss.name)]}, {
            "shape": [2, 1],
            "value":
            mdu.mpc_one_share,
            "dtype":
            loss.dtype,
            "force_cpu":
            False,
            core.op_proto_and_checker_maker.kOpRoleAttrName():
            int(core.op_proto_and_checker_maker.OpRole.Backward)
            | int(core.op_proto_and_checker_maker.OpRole.Loss),
        })
    return op_desc

コード例 #3

0

ファイルを表示

def _get_stop_gradients(program, no_grad_set):
    """ get no grad var """
    if no_grad_set is None:
        no_grad_set = set()
    else:
        no_grad_set = _get_no_grad_set_name(no_grad_set)

    no_grad_set_name = set()
    for var in program.list_vars():
        assert isinstance(var, Variable)
        if "@GRAD" in var.name:
            break
        if var.stop_gradient:
            no_grad_set_name.add(_append_grad_suffix_(var.name))
    no_grad_set_name.update(list(map(_append_grad_suffix_, no_grad_set)))
    return no_grad_set_name

コード例 #4

0

ファイルを表示

def _create_loss_op_desc_(loss):
    shape = [2, 1]
    one_share = mdu.aby3_one_share

    mpc_protocol_index = np.array(fluid.global_scope().find_var("mpc_protocol_index").get_tensor())
    if MpcProtocols(mpc_protocol_index) is MpcProtocols.PRIVC:
        shape = [1]
        one_share = mdu.privc_one_share

    op_desc = backward._create_op_desc_(
        "fill_constant", {},
        {"Out": [backward._append_grad_suffix_(loss.name)]}, {
            "shape": shape,
            "value": one_share,
            "dtype": loss.dtype,
            "force_cpu": False,
            core.op_proto_and_checker_maker.kOpRoleAttrName():
            int(core.op_proto_and_checker_maker.OpRole.Backward) |
            int(core.op_proto_and_checker_maker.OpRole.Loss),
        })
    return op_desc

コード例 #5

0

ファイルを表示

ファイル: backward.py プロジェクト: zpcalan/PaddleFL

def append_backward(loss,
                    parameter_list=None,
                    no_grad_set=None,
                    callbacks=None,
                    checkpoints=None):
    """
    This function appends backward part to main_program.
    A complete neural network training is made up of forward and backward
    propagation. However, when we configure a network, we only need to
    specify its forward part. This function uses the chain rule to automatically
    generate the backward part according to the forward part.
    In most cases, users do not need to invoke this function manually.
    It will be automatically invoked by the optimizer's `minimize` function.
    Parameters:
        loss( :ref:`api_guide_Variable_en` ): The loss variable of the network.
        parameter_list(list of str, optional): Names of parameters that need
                                           to be updated by optimizers.
                                           If it is None, all parameters
                                           will be updated.
                                           Default: None.
        no_grad_set(set of str, optional): Variable names in the :ref:`api_guide_Block_en` 0 whose gradients
                               should be ignored. All variables with
                               `stop_gradient=True` from all blocks will
                               be automatically added into this set.
                               If this parameter is not None, the names in this set will be added to the default set.
                               Default: None.
       callbacks(list of callable object, optional): List of callback functions.
                                               The callbacks are used for
                                               doing some custom jobs during
                                               backward part building. All
                                               callable objects in it will
                                               be invoked once each time a
                                               new gradient operator is added
                                               into the program. The callable
                                               object must has two input
                                               parameters: 'block' and 'context'.
                                               The 'block' is the :ref:`api_guide_Block_en` which
                                               the new gradient operator will
                                               be added to. The 'context' is a
                                               map, whose keys are gradient
                                               variable names and values are
                                               corresponding original :ref:`api_guide_Variable_en` .
                                               In addition to this, the 'context'
                                               has another special key-value pair:
                                               the key is string '__current_op_desc__'
                                               and the value is the op_desc of the
                                               gradient operator who has just
                                               triggered the callable object.
                                               Default: None.
    Returns:
        list of tuple ( :ref:`api_guide_Variable_en` , :ref:`api_guide_Variable_en` ): Pairs of parameter and its corresponding gradients.
        The key is the parameter and the value is gradient variable.
    Raises:
        AssertionError: If `loss` is not an instance of Variable.
    Examples:
        .. code-block:: python
            import paddle.fluid as fluid
            x = fluid.data(name='x', shape=[None, 13], dtype='float32')
            y = fluid.data(name='y', shape=[None, 1], dtype='float32')
            y_predict = fluid.layers.fc(input=x, size=1, act=None)
            loss = fluid.layers.square_error_cost(input=y_predict, label=y)
            avg_loss = fluid.layers.mean(loss)
            param_grad_list = fluid.backward.append_backward(loss=avg_loss)
            p_g_list1 = fluid.backward.append_backward(loss=avg_loss)  # len(p_g_list1) == 2
            p_g_list2 = fluid.backward.append_backward(loss=avg_loss, parameter_list=[p_g_list1[0][0].name])  # len(p_g_list1) == 1
            p_g_list3 = fluid.backward.append_backward(loss=avg_loss, no_grad_set=set([p_g_list1[0][0].name]))  # len(p_g_list1) == 1
            p_g_list4 = fluid.backward.append_backward(loss=avg_loss, parameter_list=[p_g_list1[0][0].name], no_grad_set=set([p_g_list1[0][0].name]))  # len(p_g_list1) == 0
    """

    assert isinstance(loss, framework.Variable)

    if loss.op is None:
        # the loss is from a cloned program. Find loss op manually.
        backward._find_loss_op_(loss)

    loss.op._set_attr(
        core.op_proto_and_checker_maker.kOpRoleAttrName(),
        int(core.op_proto_and_checker_maker.OpRole.Forward)
        | int(core.op_proto_and_checker_maker.OpRole.Loss))

    if callbacks is not None:
        isinstance(callbacks, list)

    program = loss.block.program
    program._appending_grad_times += 1

    if no_grad_set is None:
        no_grad_set = set()
    no_grad_set = copy.copy(no_grad_set)
    no_grad_dict = backward._get_stop_gradients_(program)
    no_grad_dict[0].update(
        list(map(backward._append_grad_suffix_, no_grad_set)))

    grad_info_map = dict()
    root_block = program.block(0)

    fwd_op_num = root_block.desc.op_size()
    current_block_idx = program.current_block_idx
    grad_to_var = dict()

    op_desc = _create_loss_op_desc_(loss)
    root_block.desc.append_op().copy_from(op_desc)

    block_no_grad_set = set(map(backward._strip_grad_suffix_, no_grad_dict[0]))
    op_path = backward._find_op_path_(root_block, [loss], [],
                                      block_no_grad_set)
    no_grad_vars = backward._find_no_grad_vars(root_block, op_path, [loss],
                                               block_no_grad_set)
    block_no_grad_set.update(no_grad_vars)
    no_grad_dict[0].update(
        list(map(backward._append_grad_suffix_, block_no_grad_set)))

    input_grad_names_set = None
    # For double backward, input_grad_names is used for filter
    # some non-used gradients op.
    if program._appending_grad_times > 1:
        input_grad_names_set = set([backward._append_grad_suffix_(loss.name)])

    backward._append_backward_ops_(root_block,
                                   op_path,
                                   root_block,
                                   no_grad_dict,
                                   grad_to_var,
                                   callbacks,
                                   input_grad_names_set=input_grad_names_set)

    # Because calc_gradient may be called multiple times,
    # we need rename the internal gradient variables so that they have
    # different names.
    backward._rename_grad_(root_block, fwd_op_num, grad_to_var, {})

    backward._append_backward_vars_(root_block, fwd_op_num, grad_to_var,
                                    grad_info_map)

    program.current_block_idx = current_block_idx
    program._sync_with_cpp()

    if parameter_list is not None:
        parameters = parameter_list
    else:
        params = list(filter(is_mpc_parameter, program.list_vars()))
        parameters = [param.name for param in params if param.trainable]

    params_and_grads = []
    for param in parameters:
        if cpt.to_text(param) not in grad_info_map:
            continue
        grad_info = grad_info_map[param]
        grad_block = grad_info[1]
        if not grad_block.has_var(grad_info[0]):
            raise ValueError(
                "grad block[{0}] did not have grad var {1}".format(
                    grad_info[1], grad_info[0]))
        # Get the param var from the global block
        param_var = program.global_block().var(param)
        grad_var = grad_block.var(grad_info[0])
        if loss.block.has_var(grad_info[0]):
            params_and_grads.append((param_var, grad_var))
        else:
            params_and_grads.append((param_var, None))

    op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName(
    )
    for p, g in params_and_grads:
        if g is None:
            continue
        for op in reversed(program.global_block().ops):
            assert isinstance(op, framework.Operator)
            if g.name in op.output_arg_names:
                g.op = op
                break

        if g.op is None:
            raise ValueError("Unexpected branch")
        attr_val = [p.name, g.name]
        if g.op.has_attr(op_role_var_attr_name):
            attr_val.extend(g.op.attr(op_role_var_attr_name))
        g.op._set_attr(op_role_var_attr_name, attr_val)

    return params_and_grads

コード例 #6

0

ファイルを表示

ファイル: gradient_checker.py プロジェクト: sandyhouse/Paddle

def get_static_double_grad(x,
                           y,
                           x_init=None,
                           dy_init=None,
                           place=None,
                           program=None):
    """
    Get Double Grad result of static graph.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        dy_init (numpy.array|list[numpy.array]|None): the init value for output y.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
    Returns:
        A list of numpy array that stores second derivative result calulated by static graph.
    """

    if program is None:
        program = fluid.default_main_program()
    scope = fluid.executor.global_scope()
    y_grads = []
    for i in six.moves.xrange(len(y)):
        yi = y[i]
        dyi_name = _append_grad_suffix_(yi.name)
        np_type = dtype_to_np_dtype(yi.dtype)
        dy = program.global_block().create_var(name=dyi_name,
                                               shape=yi.shape,
                                               dtype=np_type,
                                               persistable=True)
        dy.stop_gradient = False
        set_var_in_scope(scope, place, dyi_name, dy_init[i])
        y_grads.append(dy)

    # append first order grads
    dx = fluid.gradients(y, x, y_grads)

    # y_grads are the input of first-order backward,
    # so, they are also the input of second-order backward.
    x += y_grads
    x_init += dy_init

    # filter None in dx for DX/DY may be None in kernel
    filted_dx = [dxi for dxi in dx if dxi is not None]
    y = filted_dx

    # check input arguments
    x = _as_list(x)
    y = _as_list(y)

    for v in x:
        v.stop_gradient = False
        v.persistable = True
    if place is None:
        place = fluid.CPUPlace()
    if program is None:
        program = fluid.default_main_program()

    # init variable in strtup program
    scope = fluid.executor.global_scope()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    x_init = _as_list(x_init)
    # init inputs if x_init is not None
    if x_init:
        if len(x_init) != len(x):
            raise ValueError('len(x_init) (=%d) is not the same'
                             ' as len(x) (= %d)' % (len(x_init), len(x)))
        # init variable in main program
        for var, arr in zip(x, x_init):
            assert var.shape == arr.shape
        feeds = {k.name: v for k, v in zip(x, x_init)}
        exe.run(program, feed=feeds, scope=scope)

    dys = []
    for yi in y:
        np_type = dtype_to_np_dtype(yi.dtype)
        dy_name = _append_grad_suffix_(yi.name)
        # create dy Variable in Program
        dy = program.global_block().create_var(name=dy_name,
                                               shape=yi.shape,
                                               dtype=np_type,
                                               persistable=True)
        # init dy tensor in scope
        value = np.ones(yi.shape, dtype=np_type)
        dy_t = set_var_in_scope(scope, place, dy_name, value)
        dys.append(dy)

    # append second order backward
    ddx = fluid.gradients(y, x, dys)
    exe = fluid.Executor(place)

    # filter None in dx for DX/DY may be None in kernel
    # only fetch not None dx in exe.run
    filted = [(i, dxi) for i, dxi in enumerate(ddx) if dxi is not None]
    filted_idx, filted_ddx = zip(*filted)
    ddx_res = exe.run(program, scope=scope, fetch_list=filted_ddx)

    return ddx_res

コード例 #7

0

ファイルを表示

ファイル: gradient_checker.py プロジェクト: sandyhouse/Paddle

def triple_grad_check(x,
                      y,
                      x_init=None,
                      y_grads=None,
                      x_grads_grads=None,
                      place=None,
                      program=None,
                      eps=1e-6,
                      atol=1e-5,
                      rtol=1e-3,
                      raise_exception=True):
    """
    Check triple gradients. This function will append backward to the
    program before third order gradient check.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y.
        x_grads_grads (numpy.array|list[numpy.array]|None): the gradients with respect to your input.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
        eps (float): perturbation for finite differences.
        atol (float): absolute tolerance.
        rtol (float): relative tolerance.
        raise_exception (bool): whether to raise an exception if
            the check fails. Default is True.
    Returns:
        True if all differences satisfy numpy.allclose condition.
    """
    # check input arguments
    x = _as_list(x)
    for v in x:
        v.stop_gradient = False
        v.persistable = True
    y = _as_list(y)

    if program is None:
        program = fluid.default_main_program()

    if y_grads is None:
        scope = fluid.executor.global_scope()
        y_grads = []
        y_grads_init = []
        for yi in y:
            dyi_name = _append_grad_suffix_(yi.name)
            np_type = dtype_to_np_dtype(yi.dtype)
            dy = program.global_block().create_var(name=dyi_name,
                                                   shape=yi.shape,
                                                   dtype=np_type,
                                                   persistable=True)
            dy.stop_gradient = False
            v = np.random.random(size=yi.shape).astype(np_type)
            set_var_in_scope(scope, place, dyi_name, v)
            y_grads.append(dy)
            y_grads_init.append(v)
    else:
        y_grads = _as_list(y_grads)
        y_grads_init = [
            var_to_np_array_in_scope(scope, place, v.name) for v in y_grads
        ]

    # append first order grads
    target_grads = fluid.gradients(y, x, y_grads)

    if x_grads_grads is None:
        scope = fluid.executor.global_scope()
        x_grads_grads = []
        x_grads_grads_init = []
        for dxi in target_grads:
            ddxi_name = _append_grad_suffix_(dxi.name)
            np_type = dtype_to_np_dtype(dxi.dtype)
            ddx = program.global_block().create_var(name=ddxi_name,
                                                    shape=dxi.shape,
                                                    dtype=np_type,
                                                    persistable=True)
            ddx.stop_gradient = False
            v = np.random.random(size=dxi.shape).astype(np_type)
            set_var_in_scope(scope, place, ddxi_name, v)
            x_grads_grads.append(ddx)
            x_grads_grads_init.append(v)
    else:
        x_grads_grads = _as_list(x_grads_grads)
        x_grads_grads_init = [
            var_to_np_array_in_scope(scope, place, v.name)
            for v in x_grads_grads
        ]
    x += y_grads
    x_init = _as_list(x_init)
    x_init += y_grads_init

    # append second order grads
    target_grads_grads = fluid.gradients(target_grads, x, x_grads_grads)

    # filter None in target_grads_grads for Dy/Dx may be None in kernel
    filted = [(i, dyi) for i, dyi in enumerate(target_grads_grads)
              if dyi is not None]
    filted_idx, filted_target_grads_grads = zip(*filted)

    x += x_grads_grads
    x_init += x_grads_grads_init

    # x <=> [x, dout, ddx]
    grad_check(x=x,
               y=filted_target_grads_grads,
               x_init=x_init,
               place=place,
               program=program,
               eps=eps,
               atol=atol,
               rtol=rtol)

コード例 #8

0

ファイルを表示

ファイル: gradient_checker.py プロジェクト: sandyhouse/Paddle

def double_grad_check(x,
                      y,
                      x_init=None,
                      y_grads=None,
                      place=None,
                      program=None,
                      eps=1e-6,
                      atol=1e-5,
                      rtol=1e-3,
                      raise_exception=True):
    """
    Check gradients of gradients. This function will append backward to the
    program before second order gradient check.

    Args:
        x (Variable|list[Variable]): input variables to the program.
        y (Variable|list[Variable]): output variables to the program.
        x_init (numpy.array|list[numpy.array]|None): the init value for input x.
        y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        program (Program|None): a Program with forward pass.
            If None, use fluid.default_main_program().
        eps (float): perturbation for finite differences.
        atol (float): absolute tolerance.
        rtol (float): relative tolerance.
        raise_exception (bool): whether to raise an exception if
            the check fails. Default is True.
    Returns:
        True if all differences satisfy numpy.allclose condition.
    """
    # check input arguments
    x = _as_list(x)
    for v in x:
        v.stop_gradient = False
        v.persistable = True
    y = _as_list(y)

    if program is None:
        program = fluid.default_main_program()

    if y_grads is None:
        scope = fluid.executor.global_scope()
        y_grads = []
        y_grads_init = []
        for yi in y:
            dyi_name = _append_grad_suffix_(yi.name)
            np_type = dtype_to_np_dtype(yi.dtype)
            dy = program.global_block().create_var(name=dyi_name,
                                                   shape=yi.shape,
                                                   dtype=np_type,
                                                   persistable=True)
            dy.stop_gradient = False
            v = np.random.random(size=yi.shape).astype(np_type)
            set_var_in_scope(scope, place, dyi_name, v)
            y_grads.append(dy)
            y_grads_init.append(v)
    else:
        y_grads = _as_list(y_grads)
        y_grads_init = [
            var_to_np_array_in_scope(scope, place, v.name) for v in y_grads
        ]

    # append first order grads
    target_grads = fluid.gradients(y, x, y_grads)

    # y_grads are the input of first-order backward,
    # so, they are also the input of second-order backward.
    x += y_grads
    x_init = _as_list(x_init)
    x_init += y_grads_init

    grad_check(x, target_grads, x_init, place, program, eps, atol, rtol)

コード例 #9

0

ファイルを表示

ファイル: gradient_checker.py プロジェクト: sandyhouse/Paddle

def _compute_analytical_jacobian(program, x, y, place, scope):
    """Computes the analytical Jacobian for dy/dx.

    Args:
        program (Program): a Program with forward pass.
        x (Variable|list[Variable]): a variable or list of variable
        y (Variable): the target variable.
        place (fluid.CPUPlace or fluid.CUDAPlace): the device.
        scope (Scope): the scope used to run program.

    Returns:
        A list of 2-D numpy array. The list length is len(x).
        Each 2-D numpy array represents the Jacobian for dy/dx_i.
        It has "xi_size" rows and "dy_size" columns
        where "x_size" is the number of elements in x_i and
        "dy_size" is the number of elements in y.
    """
    if not isinstance(y, fluid.framework.Variable):
        raise TypeError('y is not Variable')

    dy_name = _append_grad_suffix_(y.name)

    np_type = dtype_to_np_dtype(y.dtype)
    # create dy Variable in Program
    dy = program.global_block().create_var(name=dy_name,
                                           shape=y.shape,
                                           dtype=np_type,
                                           persistable=True)
    # append backward
    dx = fluid.gradients(y, x, dy)

    # init dy tensor in scope
    value = np.zeros(y.shape, dtype=np_type)
    dy_t = set_var_in_scope(scope, place, dy_name, value)

    exe = fluid.Executor(place)

    y_size = _product(y.shape)

    x = _as_list(x)
    jacobian = make_jacobian(x, y_size, np_type)

    # filter None in dx for DX/DY may be None in kernel
    # only fetch not None dx in exe.run
    filted = [(i, dxi) for i, dxi in enumerate(dx) if dxi is not None]
    filted_idx, filted_dx = zip(*filted)

    for i in six.moves.xrange(y_size):
        _set_item(dy_t, i, 1, np_type)

        dx_res = exe.run(program, scope=scope, fetch_list=filted_dx)

        for j in six.moves.xrange(len(filted_dx)):
            dx_idx = filted_idx[j]
            if dx_res[j] is not None:
                jacobian[dx_idx][:, i] = dx_res[j].flatten()
            else:
                jacobian[dx_idx][:, i] = np.zeros(dx[dx_idx].shape,
                                                  dtype=np_type).flatten()

        _set_item(dy_t, i, 0, np_type)

    return jacobian

コード例 #10

0

ファイルを表示

    def build_net(self, cond_i):
        """
        pseudo code:
            sum_xy = x + y
            sub_yz = y - z
            if i > 1:
                internal = y + z
                sum_cond = internal + z
            else:
                sum_cond = y + z
            sum_all = sum_xy + sum_yz + sum_cond
            mean_out = mean(sum_all)
            optimizer.minimize(mean_out)
        """
        param_x = fluid.layers.create_parameter(
            dtype="float32",
            shape=self.shape,
            attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_x"),
            default_initializer=fluid.initializer.NumpyArrayInitializer(
                self.x))

        param_y = fluid.layers.create_parameter(
            dtype="float32",
            shape=self.shape,
            attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_y"),
            default_initializer=fluid.initializer.NumpyArrayInitializer(
                self.y))
        param_z = fluid.layers.create_parameter(
            dtype="float32",
            shape=self.shape,
            attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_z"),
            default_initializer=fluid.initializer.NumpyArrayInitializer(
                self.z))

        sum_xy = fluid.layers.elementwise_add(param_x, param_y, name='sum_xy')
        sub_yz = fluid.layers.elementwise_sub(param_y, param_z, name='sub_yz')
        useless = fluid.layers.fc(param_x, size=1, name='fc_useless')

        def cond_true():
            cond_yz = fluid.layers.elementwise_add(param_y,
                                                   param_z,
                                                   name='sum_cond_yz')
            # param_y will not be updated
            param_y.stop_gradient = self.y_no_grad
            cond_res = fluid.layers.elementwise_add(cond_yz,
                                                    param_z,
                                                    name='sum_cond_true')
            cond_useless = fluid.layers.elementwise_mul(param_x, param_y)
            return cond_res

        def cond_false():
            cond_res = fluid.layers.elementwise_add(param_y,
                                                    param_z,
                                                    name='sum_cond_false')
            cond_useless = fluid.layers.elementwise_mul(param_z, param_z)
            return cond_res

        cond_i = fluid.layers.assign(np.array([cond_i], dtype='float32'))
        sum_cond = fluid.layers.cond(cond_i > 1.0, cond_true, cond_false)
        sum_all = fluid.layers.sum([sum_xy, sub_yz, sum_cond])
        mean_out = fluid.layers.mean(sum_all)
        self.optimizer.minimize(mean_out)

        fetch_list = ["param_x", "param_z"] if self.y_no_grad else [
            "param_x", "param_y", "param_z"
        ]
        fetch_list += [_append_grad_suffix_(param) for param in fetch_list]
        return fetch_list