Example #1
0
    def backward(self, retain_grad=False):
        """Runs error backpropagation (a.k.a. backprop) from this variable.

        On backprop, :meth:`Function.backward` is called on each
        :class:`Function` object appearing in the backward graph starting from
        this variable. The backward graph is represented by backward references
        from variables to their creators, and from functions to their inputs.
        The backprop stops at all root variables. Some functions set ``None``
        as gradients of some inputs, where further backprop does not take place
        at such input variables.

        This method uses :data:`grad` as the initial error array. User can
        manually set a gradient array before calling this method. If
        :data:`data` contains only one element (i.e., it is scalar) and
        :data:`grad` is None, then this method automatically complement 1.0 as
        the initial error. This is useful on starting backprop from some scalar
        loss value.

        Args:
            retain_grad (bool): If True, the gradient arrays of all
                intermediate variables are kept. Otherwise, :data:`grad` of the
                intermediate variables are set to ``None`` on appropriate
                timing, which may reduce the maximum memory consumption.

                In most cases of training some model, the purpose of backprop
                is to compute gradients of parameters, not of variables, so it
                is recommended to set this flag False.

        """
        if self.creator is None:
            return

        cand_funcs = []
        seen_set = set()

        # Initilize error by 1, if this is a loss variable
        if self.data.size == 1 and self.grad is None:
            with cuda.using_device(self.data) as user:
                if user.is_active:
                    self.grad = cuda.ones_like(self.data)
                else:
                    self.grad = numpy.ones_like(self.data)

        def add_cand(cand):
            if cand is not None and cand not in seen_set:
                # Negate since heapq is min-heap
                heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
                seen_set.add(cand)

        add_cand(self.creator)

        while cand_funcs:
            _, _, func = heapq.heappop(cand_funcs)
            outputs = tuple(y() for y in func.outputs)  # access via weak ref

            in_data = tuple(x.data for x in func.inputs)
            out_grad = tuple(y and y.grad for y in outputs)
            with cuda.using_device(*(in_data + out_grad)):
                gxs = func.backward(in_data, out_grad)
            assert len(gxs) == len(in_data)

            if not retain_grad:
                for y in outputs:
                    if y is not None and y != self:
                        y.grad = None
            for x, gx in zip(func.inputs, gxs):
                x.grad = gx
                if gx is not None:  # skip if gradient does not flow
                    add_cand(x.creator)
Example #2
0
    def backward(self, retain_grad=False):
        """Runs error backpropagation (a.k.a. backprop) from this variable.

        On backprop, :meth:`Function.backward` is called on each
        :class:`Function` object appearing in the backward graph starting from
        this variable. The backward graph is represented by backward references
        from variables to their creators, and from functions to their inputs.
        The backprop stops at all root variables. Some functions set ``None``
        as gradients of some inputs, where further backprop does not take place
        at such input variables.

        This method uses :data:`grad` as the initial error array. User can
        manually set a gradient array before calling this method. If
        :data:`data` contains only one element (i.e., it is scalar) and
        :data:`grad` is None, then this method automatically complement 1.0 as
        the initial error. This is useful on starting backprop from some scalar
        loss value.

        Args:
            retain_grad (bool): If True, the gradient arrays of all
                intermediate variables are kept. Otherwise, :data:`grad` of the
                intermediate variables are set to ``None`` on appropriate
                timing, which may reduce the maximum memory consumption.

                In most cases of training some model, the purpose of backprop
                is to compute gradients of parameters, not of variables, so it
                is recommended to set this flag False.

        """
        if self.creator is None:
            return

        cand_funcs = []
        seen_set = set()

        # Initilize error by 1, if this is a loss variable
        if self.data.size == 1 and self.grad is None:
            with cuda.using_device(self.data) as user:
                if user.is_active:
                    self.grad = cuda.ones_like(self.data)
                else:
                    self.grad = numpy.ones_like(self.data)

        def add_cand(cand):
            if cand is not None and cand not in seen_set:
                # Negate since heapq is min-heap
                heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
                seen_set.add(cand)

        add_cand(self.creator)

        while cand_funcs:
            _, _, func = heapq.heappop(cand_funcs)
            outputs = tuple(y() for y in func.outputs)  # access via weak ref

            in_data = tuple(x.data for x in func.inputs)
            out_grad = tuple(y and y.grad for y in outputs)
            func._check_data_type_backward(in_data, out_grad)
            with cuda.using_device(*(in_data + out_grad)):
                gxs = func.backward(in_data, out_grad)
            assert len(gxs) == len(in_data)

            if not retain_grad:
                for y in outputs:
                    if y is not None and y != self:
                        y.grad = None
            for x, gx in zip(func.inputs, gxs):
                x.grad = gx
                if gx is not None:  # skip if gradient does not flow
                    add_cand(x.creator)
Example #3
0
                                   (model.batchsize,
                                    model.in_channels,
                                    model.insize,
                                    model.insize)).astype(numpy.float32)
    if args.gpu >= 0:
        x_batch = cuda.to_gpu(x_batch)
    forward_preprocess_times[iteration] = forward_preprocess_timer.milliseconds()


    forward_timer.start()
    y = model.forward(x_batch)
    forward_times[iteration] = forward_timer.milliseconds()

    backward_preprocess_timer.start()
    if args.gpu >= 0:
        y.grad = cuda.ones_like(y.data)
    else:
        y.grad = numpy.ones_like(y.data)
    backward_preprocess_times[iteration] = backward_preprocess_timer.milliseconds()

    backward_timer.start()
    y.backward()
    backward_times[iteration] = backward_timer.milliseconds()

    iter_times[iteration] = iter_timer.milliseconds()

total_timer.stop()

print('Forward Preprocess:')
print('average-forward-preprocess-pass\t{}\tms'.format(forward_preprocess_times.mean()))
print('Forward:')
Example #4
0
    forward_preprocess_timer.start()
    x_batch = numpy.random.uniform(-1, 1, (model.batchsize, model.in_channels,
                                           model.insize, model.insize)).astype(
                                               numpy.float32)
    if args.gpu >= 0:
        x_batch = cuda.to_gpu(x_batch)
    forward_preprocess_times[
        iteration] = forward_preprocess_timer.milliseconds()

    forward_timer.start()
    y = model.forward(x_batch)
    forward_times[iteration] = forward_timer.milliseconds()

    backward_preprocess_timer.start()
    if args.gpu >= 0:
        y.grad = cuda.ones_like(y.data)
    else:
        y.grad = numpy.ones_like(y.data)
    backward_preprocess_times[
        iteration] = backward_preprocess_timer.milliseconds()

    backward_timer.start()
    y.backward()
    backward_times[iteration] = backward_timer.milliseconds()

    iter_times[iteration] = iter_timer.milliseconds()

total_timer.stop()

print('Forward Preprocess:')
print('average-forward-preprocess-pass\t{}\tms'.format(