Ejemplo n.º 1
0
    def backward(self, target_input_indexes, grad_outputs):
        retained_inputs = self.get_retained_inputs()
        inputs = [None] * len(self.inputs)
        in_data = [None] * len(self.inputs)
        for retained, i_in in six.moves.zip(retained_inputs,
                                            self._input_indexes_to_retain):
            inputs[i_in] = retained
            in_data[i_in] = None if retained is None else retained.array
        in_data = tuple(in_data)

        grad_out_data = tuple(
            [None if grad is None else grad.array for grad in grad_outputs])

        is_chainerx_fallback_mode = self._is_chainerx_fallback_mode
        if is_chainerx_fallback_mode:
            # Convert input and output gradients to numpy/cupy
            in_data = backend.from_chx(in_data)
            grad_out_data = backend.from_chx(grad_out_data)

        # Call Function.backward
        with chainer.using_device(
                backend.get_device_from_array(*(in_data + grad_out_data))):
            if is_chainerx_fallback_mode:
                # Enable attribute fallback
                with function_node._chainerx_attribute_fallback(
                        self._function, self.chainerx_device):
                    gxs = self._function.backward(in_data, grad_out_data)
            else:
                gxs = self._function.backward(in_data, grad_out_data)

        # Check gradients
        for x, gx in six.moves.zip(self.inputs, gxs):
            if gx is not None:
                variable._check_grad_type(self, x, True, gx)

        # Convert input gradients back to ChainerX
        if is_chainerx_fallback_mode:
            gxs = backend.to_chx(gxs)

        ret = []
        for i in target_input_indexes:
            if gxs[i] is None:
                g = None
            else:
                # Intentionally not passing requires_grad=False so that
                # backprop routines can raise an error when a further backprop
                # is attempted against this gradient variable.
                g = variable.Variable(gxs[i])
                if g.xp is not chainerx:
                    g.node._old_style_grad_generator = self._function.label
            ret.append(g)

        return tuple(ret)
Ejemplo n.º 2
0
    def backward(self, target_input_indexes, grad_outputs):
        retained_inputs = self.get_retained_inputs()
        inputs = [None] * len(self.inputs)
        in_data = [None] * len(self.inputs)
        for retained, i_in in six.moves.zip(
                retained_inputs, self._input_indexes_to_retain):
            inputs[i_in] = retained
            in_data[i_in] = None if retained is None else retained.array
        in_data = tuple(in_data)

        grad_out_data = tuple([None if grad is None else grad.data
                               for grad in grad_outputs])

        is_chainerx_fallback_mode = self._is_chainerx_fallback_mode
        if is_chainerx_fallback_mode:
            # Convert input and output gradients to numpy/cupy
            in_data = backend.from_chx(in_data)
            grad_out_data = backend.from_chx(grad_out_data)

        # Call Function.backward
        with cuda.get_device_from_array(*(in_data + grad_out_data)):
            if is_chainerx_fallback_mode:
                # Enable attribute fallback
                with function_node._chainerx_attribute_fallback(
                        self._function, self.chainerx_device):
                    gxs = self._function.backward(in_data, grad_out_data)
            else:
                gxs = self._function.backward(in_data, grad_out_data)

        # Check gradients
        for x, gx in six.moves.zip(self.inputs, gxs):
            if gx is not None:
                variable._check_grad_type(self, x, True, gx)

        # Convert input gradients back to ChainerX
        if is_chainerx_fallback_mode:
            gxs = backend.to_chx(gxs)

        ret = []
        for i in target_input_indexes:
            if gxs[i] is None:
                g = None
            else:
                # Intentionally not passing requires_grad=False so that
                # backprop routines can raise an error when a further backprop
                # is attempted against this gradient variable.
                g = variable.Variable(gxs[i])
                if g.xp is not chainerx:
                    g.node._old_style_grad_generator = self._function.label
            ret.append(g)

        return tuple(ret)
Ejemplo n.º 3
0
    def backward(self, target_input_indexes, grad_outputs):
        in_data = tuple([input.data for input in self.inputs])
        grad_out_data = tuple(
            [None if grad is None else grad.data for grad in grad_outputs])

        with cuda.get_device_from_array(*(in_data + grad_out_data)):
            gxs = self._function.backward(in_data, grad_out_data)
        for x, gx in six.moves.zip(self.inputs, gxs):
            variable._check_grad_type(self, x, gx)

        ret = []
        for i in target_input_indexes:
            if gxs[i] is None:
                g = None
            else:
                # Intentionally not passing requires_grad=False so that
                # backprop routines can raise an error when a further backprop
                # is attempted against this gradient variable.
                g = variable.Variable(gxs[i])
                g.node._old_style_grad_generator = self._function.label
            ret.append(g)

        return tuple(ret)
Ejemplo n.º 4
0
    def backward(self, target_input_indexes, grad_outputs):
        in_data = tuple([input.data for input in self.inputs])
        grad_out_data = tuple([None if grad is None else grad.data
                               for grad in grad_outputs])

        with cuda.get_device_from_array(*(in_data + grad_out_data)):
            gxs = self._function.backward(in_data, grad_out_data)
        for x, gx in six.moves.zip(self.inputs, gxs):
            variable._check_grad_type(self, x, gx)

        ret = []
        for i in target_input_indexes:
            if gxs[i] is None:
                g = None
            else:
                # Intentionally not passing requires_grad=False so that
                # backprop routines can raise an error when a further backprop
                # is attempted against this gradient variable.
                g = variable.Variable(gxs[i])
                g.node._old_style_grad_generator = self._function.label
            ret.append(g)

        return tuple(ret)
Ejemplo n.º 5
0
def forward_grad(self, rho=1e-3, decay=0.50, loss_scale=None):
    """test
    """
    self._node._check_old_style_gradient()
    if self.creator_node is None:
        return
    initial_device = None
    if cuda.available and isinstance(self.data, cuda.ndarray):
        try:
            initial_device = cuda.Device()
        except cuda.cupy.cuda.runtime.CUDARuntimeError as e:
            if e.status != 38:  # cudaErrorNoDevice
                raise

    is_debug = chainer.is_debug()

    cand_funcs = []
    seen_set = set()

    def add_cand(cand):
        if cand not in seen_set:
            # Negate since heapq is min-heap
            heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
            seen_set.add(cand)

    add_cand(self.creator_node)

    cur_decay = 1.0
    while cand_funcs:
        _, _, func = heapq.heappop(cand_funcs)
        inputs = func.inputs
        target_input_indexes = [
            i for i, x in enumerate(inputs) if x.requires_grad
        ]
        if not target_input_indexes:
            continue

        in_data = tuple([x.data for x in inputs])
        cuda.get_device_from_array(*in_data).use()
        if hasattr(func, 'with_frad') and func.with_frad:
            gW, gb = func.forward_grad(in_data, rho)
            gxs = [None, Variable(gW * cur_decay), Variable(gb * cur_decay)]
            cur_decay *= decay
        else:
            gxs = [None] * len(inputs)

        if is_debug:
            for gx in gxs:
                if gx is None:
                    continue
                gx_data = gx.data
                if gx_data.dtype.kind == 'f':
                    cuda.get_device_from_array(gx_data).use()
                    if cuda.get_array_module(gx_data).isnan(gx_data).any():
                        raise RuntimeError(
                            'NaN is detected on forward-grad computation of '
                            '{}'.format(func.label))

        for i, gx in enumerate(gxs):
            x = inputs[i]
            if x.creator_node is not None:
                add_cand(x.creator_node)

            if gx is None:
                continue

            if not x.requires_grad:
                continue

            _check_grad_type(func, x, gx.data)

            x_var = x.get_variable_or_none()
            if x_var is not None:
                x_var._grad_var = gx
                x_var._loss_scale = loss_scale

        del gxs  # to reduce memory usage
        if initial_device is not None:
            initial_device.use()