def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Inputs
        x0 = inputs[0].data
        dy = inputs[1].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_dy = inputs[1].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # Compute
        val = self.forward_func.info.args["val"]
        if prop_down[0] or prop_down[1]:
            cv = F.constant(val, x0.shape)
            if not nn.get_auto_forward():
                cv.forward()
            log_v = F.log(cv.data)
        if prop_down[0]:
            if accum[0]:
                g_x0 += g_dx0 * dy * F.r_pow_scalar(x0, val) * log_v**2.0
            else:
                g_x0.copy_from(g_dx0 * dy * F.r_pow_scalar(x0, val) *
                               log_v**2.0)
        if prop_down[1]:
            if accum[1]:
                g_dy += g_dx0 * F.r_pow_scalar(x0, val) * log_v
            else:
                g_dy.copy_from(g_dx0 * F.r_pow_scalar(x0, val) * log_v)
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        axis = self.forward_func.info.args["axis"]
        # Inputs
        x0 = inputs[0].data  # logits
        t0 = inputs[1].data  # labels
        dz = inputs[2].data  # grad_input
        # Outputs
        dx0 = outputs[0].data
        dt0 = outputs[1].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_t0 = inputs[1].grad
        g_dz = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dt0 = outputs[1].grad

        # Computation
        ## w.r.t. x0
        if prop_down[0]:
            # gradient is the backward of softmax with (g_dx0 * dz) as in-coming gradient
            si = nn.Variable(x0.shape).apply(data=x0, need_grad=True)
            si.grad.fill(0.0)
            so = F.softmax(si, axis)
            if not nn.get_auto_forward():
                so.forward()
            so.backward(g_dx0 * dz, clear_buffer=False)
            g_x0_ = si.grad
            if accum[0]:
                g_x0 += g_x0_
            else:
                g_x0.copy_from(g_x0_)

        ## w.r.t. t0 is not required

        ## w.r.t. dz
        if prop_down[2]:
            # Instable implementation since using `/ dz`
            ## g_dz_ = g_dx0 * dx0 / dz
            ## g_dz_ = F.sum(g_dz_, axis)

            shape = dz.shape if dz.shape != [] else [1]
            si = nn.Variable(x0.shape).apply(data=x0, need_grad=True)
            ti = nn.Variable(t0.shape).apply(data=t0)
            o = nn.Variable(shape)
            o.grad.fill(1.0)
            self.forward_func.backward([si, ti], [o], [False, False])

            # Sum g_dx0_i * (y_hat_i - y_i) over i
            g_dz_ = F.sum(g_dx0 * si.grad, axis)
            if accum[2]:
                g_dz += g_dz_
            else:
                g_dz.copy_from(g_dz_)
Exemple #3
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        axis = self.forward_func.info.args["axis"]
        # To deal with double_backward index error for cuda in windows
        if axis < 0:
            axis += inputs[0].ndim

        # Inputs
        x0 = inputs[0].data
        y0 = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_y0 = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # w.r.t. x0
        if prop_down[0]:
            # gradient is the backward of softmax with (g_x0 * -sum_i dy_i) as in-coming gradient
            neg_sum_dy = -F.sum(dy, axis, True)
            si = nn.Variable(x0.shape).apply(data=x0, need_grad=True)
            si.grad.fill(0.0)
            so = F.softmax(si, axis)
            if not nn.get_auto_forward():
                so.forward()
            so.backward(g_dx0 * neg_sum_dy, clear_buffer=False)
            g_x0_ = si.grad
            if accum[0]:
                g_x0 += g_x0_
            else:
                g_x0.copy_from(g_x0_)

        # w.r.t. y0 is the grad-depends

        # w.r.t. dy
        if prop_down[2]:
            # gradient is the backward of log_softmax with g_dx0 as in-coming gradient
            lsi = nn.Variable(x0.shape).apply(data=x0,
                                              grad=g_dy,
                                              need_grad=True)
            lso = nn.Variable(x0.shape).apply(data=y0, grad=g_dx0)
            self.forward_func.backward([lsi], [lso], accum=[accum[2]])
Exemple #4
0
    def _create_function(self, f, callback, current_scope):
        callback.verbose2('Creating function {}: {} --> {}.'.format(
            f.name, [i.name for i in f.inputs], [i.name for i in f.outputs]))

        f = callback._apply_generate_function_by_type(f)
        f = callback._apply_generate_function_by_name(f)
        inputs = self._create_inputs(f.inputs, callback, current_scope)
        function_instance = _create_function(inputs, f.proto, self.batch_size)

        outputs = function_instance(*inputs,
                                    n_outputs=len(f.outputs),
                                    auto_forward=nn.get_auto_forward())
        if not isinstance(outputs, tuple):
            outputs = (outputs, )

        for o, ovar in zip(f.outputs, outputs):
            o.variable = ovar
            ovar.name = o.name
Exemple #5
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        with_bias = True if len(inputs) == 4 else False
        base_axis = self.forward_func.info.args["base_axis"]

        # Inputs
        x0 = inputs[0].data
        w0 = inputs[1].data
        b0 = inputs[2].data if with_bias else None
        dy = inputs[3].data if with_bias else inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dw0 = outputs[1].data
        db0 = outputs[2].data if with_bias else None
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_w0 = inputs[1].grad
        g_b0 = inputs[2].grad if with_bias else None
        g_dy = inputs[3].grad if with_bias else inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dw0 = outputs[1].grad
        g_db0 = outputs[2].grad if with_bias else None

        # Computation
        ## w.r.t. x or w.r.t. w
        if prop_down[0] or prop_down[1]:
            # we can re-use the backward of the forward with different inputs
            inp_x = nn.Variable(x0.shape).apply(data=g_dx0,
                                                grad=g_x0,
                                                need_grad=prop_down[0])
            inp_w = nn.Variable(w0.shape).apply(data=g_dw0,
                                                grad=g_w0,
                                                need_grad=prop_down[1])
            out_y = nn.Variable(dy.shape).apply(grad=dy)
            inputs = [inp_x, inp_w]
            outputs = [out_y]
            if with_bias:
                inp_b = nn.Variable(b0.shape).apply(need_grad=False)
                inputs += [inp_b]
            self.forward_func.backward(inputs, outputs, accum)
        ## w.r.t. b
        if with_bias and prop_down[2] and not accum[2]:
            zeros = F.constant(0, b0.shape)
            if not nn.get_auto_forward():
                zeros.forward()
            g_b0.copy_from(zeros.data)
        ## w.r.t. dy
        if (not with_bias and prop_down[2]) or (with_bias and prop_down[3]):
            accum_dy = accum[3] if with_bias else accum[2]
            g_dy_ = F.affine(g_dx0, w0, None, base_axis) + \
                F.affine(x0, g_dw0, None, base_axis)
            if with_bias:
                nshape = [1] * base_axis + list(b0.shape)
                g_db0 = F.reshape(g_db0, nshape)
                g_dy_ += g_db0
            if accum_dy:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
Exemple #6
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        with_bias = True if len(inputs) == 4 else False
        base_axis = self.forward_func.info.args["base_axis"]
        pad = self.forward_func.info.args["pad"]
        stride = self.forward_func.info.args["stride"]
        dilation = self.forward_func.info.args["dilation"]
        group = self.forward_func.info.args["group"]
        channel_last = self.forward_func.info.args["channel_last"]
        output_padding = self.forward_func.info.args["output_padding"]

        # Inputs
        x0 = inputs[0].data
        w0 = inputs[1].data
        b0 = inputs[2].data if with_bias else None
        dy = inputs[3].data if with_bias else inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dw0 = outputs[1].data
        db0 = outputs[2].data if with_bias else None
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_w0 = inputs[1].grad
        g_b0 = inputs[2].grad if with_bias else None
        g_dy = inputs[3].grad if with_bias else inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dw0 = outputs[1].grad
        g_db0 = outputs[2].grad if with_bias else None

        # Computation
        ## w.r.t. x or w.r.t. w
        if prop_down[0] or prop_down[1]:
            # we can re-use the backward of the forward with different inputs
            inp_x = nn.Variable(x0.shape).apply(data=g_dx0,
                                                grad=g_x0,
                                                need_grad=prop_down[0])
            inp_w = nn.Variable(w0.shape).apply(data=g_dw0,
                                                grad=g_w0,
                                                need_grad=prop_down[1])
            out_y = nn.Variable(dy.shape).apply(grad=dy)
            inputs = [inp_x, inp_w]
            outputs = [out_y]
            if with_bias:
                inp_b = nn.Variable(b0.shape).apply(need_grad=False)
                inputs += [inp_b]
            self.forward_func.backward(inputs, outputs, accum)
        ## w.r.t. b
        if with_bias and prop_down[2] and not accum[2]:
            zeros = F.constant(0, b0.shape)
            if not nn.get_auto_forward():
                zeros.forward()
            g_b0.copy_from(zeros.data)
        ## w.r.t. dy
        if (not with_bias and prop_down[2]) or (with_bias and prop_down[3]):
            accum_dy = accum[3] if with_bias else accum[2]
            params = {
                'base_axis': base_axis,
                'pad': pad,
                'stride': stride,
                'dilation': dilation,
                'output_padding': output_padding,
                'group': group,
                'channel_last': channel_last
            }
            g_dy_ = (F.deconvolution(g_dx0, w0, None, **params) +
                     F.deconvolution(x0, g_dw0, None, **params))
            if with_bias:
                if not channel_last:
                    g_db0 = F.reshape(g_db0, [
                        1 if i != base_axis else g_db0.shape[0]
                        for i in range(g_dy.ndim)
                    ])
                else:
                    g_db0 = F.reshape(g_db0, [
                        1 if i != (g_dy.ndim - 1) else g_db0.shape[0]
                        for i in range(g_dy.ndim)
                    ])
                g_dy_ += g_db0
            if accum_dy:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
def add2(ctx, x0, x1, n_outputs=-1, outputs=None):
    return Add2()(x0, x1, n_outputs=n_outputs, auto_forward=nn.get_auto_forward(), outputs=outputs)