def forward(self, x, finetune=False):
        if self.gamma is not None:
            gamma = self.gamma
        else:
            with chainer.using_device(self.device):
                gamma = self.xp.ones(
                    self.avg_mean.shape, dtype=x.dtype)

        if self.beta is not None:
            beta = self.beta
        else:
            with chainer.using_device(self.device):
                beta = self.xp.zeros(
                    self.avg_mean.shape, dtype=x.dtype)

        if configuration.config.train:
            if finetune:
                self.N += 1
                decay = 1. - 1. / self.N
            else:
                decay = self.decay

            ret = batch_renormalization.batch_renormalization(
                x, gamma, beta, self.rmax, self.dmax,
                self.eps, self.avg_mean, self.avg_var, decay,
                update_statistics=True)
        else:
            # Use running average statistics or fine-tuned statistics.
            mean = self.avg_mean
            var = self.avg_var
            ret = batch_normalization.fixed_batch_normalization(
                x, gamma, beta, mean, var, self.eps)
        return ret
Example #2
0
    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        device = backend.get_device_from_array(x1_data)
        with chainer.using_device(device):
            c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size),
                                           dtype=self.x1.dtype))
            c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(
            self.link.h.data, [batch], axis=0)
        y2 = self.link(x2)
        device = backend.get_device_from_array(x1)
        with chainer.using_device(device):
            c2_expect, y2_expect = \
                functions.lstm(c1_expect,
                               self.link.upward(x2) + self.link.lateral(h1_in))
        testing.assert_allclose(y2.data, y2_expect.data)
        testing.assert_allclose(self.link.h.data[:batch], y2_expect.data)
        testing.assert_allclose(self.link.h.data[batch:], h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        c3_expect, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        testing.assert_allclose(y3.data, y3_expect.data)
        testing.assert_allclose(self.link.h.data, h2_rest.data)
Example #3
0
 def normalize_weight(self, link):
     """Normalize target weight before every single forward computation."""
     weight_name, vector_name = self.weight_name, self.vector_name
     W = getattr(link, weight_name)
     u = getattr(link, vector_name)
     weight_matrix = self.reshape_W(W)
     if not configuration.config.in_recomputing:
         with chainer.using_device(link.device):
             u, v = update_approximate_vectors(
                 weight_matrix, u, self.n_power_iteration, self.eps)
     else:
         v = self.v
     sigma = calculate_max_singular_value(weight_matrix, u, v)
     if self.factor is not None:
         sigma /= self.factor
     if self.use_gamma:
         W = link.gamma * W / sigma
     else:
         W = W / sigma
     if not configuration.config.in_recomputing:
         self.v = v
         with chainer.using_device(link.device):
             if configuration.config.train:
                 if link.xp is chainerx:
                     # TODO(crcrpar): Remove this when
                     # chainerx supports `copyto`.
                     getattr(link, vector_name)[:] = u
                 else:
                     backend.copyto(getattr(link, vector_name), u)
     return W
Example #4
0
    def forward(self, c, h, x):
        """Returns new cell state and updated output of LSTM.

        Args:
            c (~chainer.Variable): Cell states of LSTM units.
            h (~chainer.Variable): Output at the previous time step.
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
            ``c_new`` represents new cell state, and ``h_new`` is updated
            output of LSTM units.

        """
        if self.upward.W.array is None:
            in_size = x.size // x.shape[0]
            with chainer.using_device(self.device):
                self.upward._initialize_params(in_size)
                self._initialize_params()

        lstm_in = self.upward(x)
        if h is not None:
            lstm_in += self.lateral(h)
        if c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                c = variable.Variable(
                    xp.zeros((x.shape[0], self.state_size), dtype=x.dtype))
        return lstm.lstm(c, lstm_in)
Example #5
0
    def forward(self, c, h, x):
        """Returns new cell state and updated output of LSTM.

        Args:
            c (~chainer.Variable): Cell states of LSTM units.
            h (~chainer.Variable): Output at the previous time step.
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
            ``c_new`` represents new cell state, and ``h_new`` is updated
            output of LSTM units.

        """
        if self.upward.W.array is None:
            in_size = x.size // x.shape[0]
            with chainer.using_device(self.device):
                self.upward._initialize_params(in_size)
                self._initialize_params()

        lstm_in = self.upward(x)
        if h is not None:
            lstm_in += self.lateral(h)
        if c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                c = variable.Variable(
                    xp.zeros((x.shape[0], self.state_size), dtype=x.dtype))
        return lstm.lstm(c, lstm_in)
Example #6
0
 def check_equal_memory_shared(self, arr1, arr2):
     # Check that the two arrays share the internal memory.
     numpy.testing.assert_array_equal(backend.CpuDevice().send(arr1),
                                      backend.CpuDevice().send(arr2))
     with chainer.using_device(backend.get_device_from_array(arr1)):
         arr1 += 2
     numpy.testing.assert_array_equal(backend.CpuDevice().send(arr1),
                                      backend.CpuDevice().send(arr2))
     with chainer.using_device(backend.get_device_from_array(arr1)):
         arr1 -= 2
Example #7
0
 def check_equal_memory_shared(self, arr1, arr2):
     # Check that the two arrays share the internal memory.
     numpy.testing.assert_array_equal(
         backend.CpuDevice().send(arr1), backend.CpuDevice().send(arr2))
     with chainer.using_device(backend.get_device_from_array(arr1)):
         arr1 += 2
     numpy.testing.assert_array_equal(
         backend.CpuDevice().send(arr1), backend.CpuDevice().send(arr2))
     with chainer.using_device(backend.get_device_from_array(arr1)):
         arr1 -= 2
    def forward(self, x, finetune=False):
        if self.gamma is not None:
            gamma = self.gamma
        else:
            with chainer.using_device(self.device):
                gamma = self.xp.ones(self.avg_mean.shape, dtype=x.dtype)

        if self.beta is not None:
            beta = self.beta
        else:
            with chainer.using_device(self.device):
                beta = self.xp.zeros(self.avg_mean.shape, dtype=x.dtype)

        if configuration.config.train:
            if finetune:
                self.N += 1
                decay = 1. - 1. / self.N
            else:
                decay = self.decay

            avg_mean = self.avg_mean
            avg_var = self.avg_var
            update_statistics = True

            if chainer.config.in_recomputing:
                # Do not update statistics when extra forward computation is
                # called.
                if finetune:
                    self.N -= 1  # Revert the count
                avg_mean = self._prev_avg_mean
                avg_var = self._prev_avg_var
                update_statistics = False
            elif chainer.config._will_recompute:
                self._prev_avg_mean = avg_mean.copy()
                self._prev_avg_var = avg_var.copy()

            ret = batch_renormalization.batch_renormalization(
                x,
                gamma,
                beta,
                self.rmax,
                self.dmax,
                self.eps,
                avg_mean,
                avg_var,
                decay,
                update_statistics=update_statistics)
        else:
            # Use running average statistics or fine-tuned statistics.
            mean = self.avg_mean
            var = self.avg_var
            ret = batch_normalization.fixed_batch_normalization(
                x, gamma, beta, mean, var, self.eps)
        return ret
Example #9
0
def generate_array(initializer, shape, xp, dtype=None, device=None):
    # type: (types.AbstractInitializer, types.ShapeSpec, types.Xp, types.DTypeSpec, types.DeviceSpec) -> types.NdArray  # NOQA
    """Return initialized array.

    The algorithms used to make the new values depend on the
    concrete derived classes. If the initializer has the ``dtype`` attribute,
    it is used to construct the array. Otherwise, ``chainer.config.dtype`` is
    used instead. See :ref:`configuration` for the dtype config.

    Args:
        initializer: A callable object that takes :ref:`ndarray` and edits its
            value.
        shape (int or tuple of int): Shape of the initialized array.
        xp (module): :mod:`cupy`, :mod:`numpy`, or :mod:`chainerx`.
        dtype: Dtype specifier. If omitted, ``initializer.dtype`` is used.
        device: Target device specifier. If omitted, the current device is
             used for :mod:`cupy`, and the default device is used for
             :mod:`chainerx`.

    Returns:
        :ref:`ndarray`: An initialized array.

    """
    dtype_attr = getattr(initializer, 'dtype', None)
    if dtype is not None and dtype_attr is not None \
            and numpy.dtype(dtype) != numpy.dtype(dtype_attr):
        raise ValueError('dtype mismatch: {} != {}'.format(dtype, dtype_attr))
    if dtype is None:
        dtype = dtype_attr
    dtype = chainer.get_dtype(dtype)

    if device is None:
        backend_device = backend._guess_device_from_array_module(xp)
    else:
        backend_device = chainer.get_device(device)
        if xp != backend_device.xp:
            raise ValueError('xp and device arguments are inconsistent.')

    if xp is chainerx:
        # Initialize with NumPy/CuPy array that shares memory with the
        # ChainerX array.
        # TODO(sonots): Directly use initializer after ChainerX
        # supports random.
        chx_device = backend_device.device
        array = chainerx.empty(shape, dtype=dtype, device=chx_device)
        fallback_device = backend_device.fallback_device
        with chainer.using_device(fallback_device):
            initializer(fallback_device.send(array))
        return array

    with chainer.using_device(backend_device):
        array = xp.empty(shape, dtype=dtype)
        initializer(array)
    return array
Example #10
0
    def __call__(self, x, **kwargs):
        argument.check_unexpected_kwargs(
            kwargs,
            test='test argument is not supported anymore. '
            'Use chainer.using_config')
        finetune, = argument.parse_kwargs(kwargs, ('finetune', False))

        # reshape input x for instance normalization
        shape_org = x.shape
        B, C = shape_org[:2]
        shape_ins = (1, B * C) + shape_org[2:]
        x_reshaped = functions.reshape(x, shape_ins)

        gamma = self.gamma
        if gamma is None:
            with chainer.using_device(self.device):
                gamma = self.xp.ones(self.avg_mean.shape, dtype=self._dtype)

        beta = self.beta
        if beta is None:
            with chainer.using_device(self.device):
                beta = self.xp.zeros(self.avg_mean.shape, dtype=self._dtype)

        gamma = functions.tile(gamma, (B, ))
        beta = functions.tile(beta, (B, ))
        mean = self.xp.tile(self.avg_mean, (B, ))
        var = self.xp.tile(self.avg_var, (B, ))

        # instance normalization is always done in training mode
        if finetune:
            self.N += 1
            decay = 1. - 1. / self.N
        else:
            decay = self.decay

        ret = functions.batch_normalization(x_reshaped,
                                            gamma,
                                            beta,
                                            eps=self.eps,
                                            running_mean=mean,
                                            running_var=var,
                                            decay=decay)

        self.avg_mean = mean.reshape(B, C).mean(axis=0)
        self.avg_var = var.reshape(B, C).mean(axis=0)

        # ret is normalized input x
        return functions.reshape(ret, shape_org)
Example #11
0
    def predict(self, img_feats, bos, eos, max_caption_length):
        """Batch of image features to captions."""
        hx, cx, _ = self.reset(img_feats)

        with chainer.using_device(self.device):
            xp = self.xp
            captions = xp.full((img_feats.shape[0], 1), bos, dtype=np.int32)
            for i in range(max_caption_length):
                # Create a list of the previous tokens to treat as inputs
                xs = [xp.atleast_1d(c[-1]) for c in captions]

                # Get the predictions `ys`
                hx, cx, ys = self.step(hx, cx, xs)

                # From `ys`, get the indices for the highest confidence.
                # These indices correspond to the predicted tokens
                #
                # Note that this is a greedy approach and that it can by
                # replaced by e.g. beam search
                pred = ys.array.argmax(axis=1).astype(np.int32)
                captions = xp.hstack((captions, pred[:, None]))

                if (pred == eos).all():
                    break
        return captions
def _as_array(data):
    if isinstance(data, chainer.get_array_types()):
        return data
    else:
        device = chainer.backend.get_device_from_array(data[0])
        with chainer.using_device(device):
            return device.xp.asarray(data)
Example #13
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    xp.zeros((len(x), self.state_size), dtype=x.dtype))
        lstm_in = reshape.reshape(lstm_in,
                                  (len(lstm_in), lstm_in.shape[1] // 4, 4))
        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, a.shape[:2])
        i = reshape.reshape(i, i.shape[:2])
        f = reshape.reshape(f, f.shape[:2])
        o = reshape.reshape(o, o.shape[:2])
        peep_in_i = self.peep_i(self.c)
        peep_in_f = self.peep_f(self.c)
        a = tanh.tanh(a)
        i = sigmoid.sigmoid(i + peep_in_i)
        f = sigmoid.sigmoid(f + peep_in_f)
        self.c = a * i + f * self.c
        peep_in_o = self.peep_o(self.c)
        o = sigmoid.sigmoid(o + peep_in_o)
        self.h = o * tanh.tanh(self.c)
        return self.h
Example #14
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    xp.zeros((len(x), self.state_size), dtype=x.dtype))
        lstm_in = reshape.reshape(
            lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4))
        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, a.shape[:2])
        i = reshape.reshape(i, i.shape[:2])
        f = reshape.reshape(f, f.shape[:2])
        o = reshape.reshape(o, o.shape[:2])
        peep_in_i = self.peep_i(self.c)
        peep_in_f = self.peep_f(self.c)
        a = tanh.tanh(a)
        i = sigmoid.sigmoid(i + peep_in_i)
        f = sigmoid.sigmoid(f + peep_in_f)
        self.c = a * i + f * self.c
        peep_in_o = self.peep_o(self.c)
        o = sigmoid.sigmoid(o + peep_in_o)
        self.h = o * tanh.tanh(self.c)
        return self.h
Example #15
0
    def __init__(self, prediction_config_path="prediction_config.json", gpu=-1):
        self.gpu = gpu

        with open(prediction_config_path) as prediction_config_file:
            prediction_config = json.load(prediction_config_file)

        classes = sorted(prediction_config["classes"])
        long_class_label_dict = {
            "alpha_num": "Alphanumeric",
            "alphanum": "Alphanumeric",
            "date": "Date",
            "num": "Number",
            "plz": "Zip Code",
            "text": "Word"
        }
        self.idx_to_label_map = {i: long_class_label_dict[label] for i, label in enumerate(classes)}

        self.input_image_size = prediction_config["input_image_size"]
        self.base_model = PooledResNet(prediction_config["resnet_size"])
        self.model = CrossEntropyClassifier(self.base_model, len(classes))

        with numpy.load(prediction_config["model_path"]) as f:
            chainer.serializers.NpzDeserializer(f, strict=True).load(self.model)

        if int(self.gpu) >= 0:
            with chainer.using_device(chainer.get_device(self.gpu)):
                self.base_model.to_device(self.gpu)
                self.model.to_device(self.gpu)
Example #16
0
    def backward(self, indexes, flat_gys):
        device = chainer.backend.get_device_from_array(flat_gys[0].array)
        gys, _ = _unflatten(flat_gys, self.nested_outputs)
        retained = self.retained
        gys = [self._to_var(gy) for gy in gys]
        values = gys + retained

        del self.retained
        del self.nested_outputs

        inputs = {}
        assert len(self.bwd_input_names) == len(values)
        for name, value in zip(self.bwd_input_names, values):
            inputs[name] = value

        with chainer.using_device(self.chainerx_device_name):
            outputs = self.bwd.run(inputs)
        gxs = []
        assert len(self.input_tmpl) == len(self.fwd_input_names)
        for name, tmpl in zip(self.fwd_input_names, self.input_tmpl):
            grad_name = 'grad_out@' + name
            if grad_name in outputs:
                gx = _from_var(outputs[grad_name], device)
                if _is_array(tmpl):
                    gxs.append(gx)
                else:
                    assert len(gx) == len(tmpl)
                    gxs.extend(_flatten_structured(gx, tmpl))
            else:
                gxs.extend([None] * len(_flatten(tmpl)))

        gxs = tuple(None if gx is None else chainer.Variable(gx) for gx in gxs)
        return gxs
Example #17
0
    def evaluate_image(
            self,
            image: numpy.ndarray,
            return_boxes: bool = False) -> Union[bool, Tuple[bool, list]]:
        if self.needs_patches:
            patches, bboxes = self.prediction_helper.create_sliding_window(
                image)
        else:
            patches = image[numpy.newaxis, ...]

        network = self.network
        device = chainer.get_device(network.device)

        xp = numpy
        with chainer.using_device(device), chainer.configuration.using_config(
                'train', False):
            predicted_patches = []
            for patch in patches:
                batch = [{'image': patch}]
                batch = concat_examples(batch, device)

                xp = get_array_module(batch['image'])
                predictions = network(**batch)
                predicted_patches.append(xp.argmax(predictions.array, axis=1))

            predicted_patches = xp.stack(predicted_patches, axis=0)
            contains_handwriting = chainer.backends.cuda.to_cpu(
                predicted_patches == 1)

            if return_boxes:
                assert self.needs_patches, "Can not return boxes if we do not need patches"
                return contains_handwriting, bboxes
            else:
                return contains_handwriting.any()
Example #18
0
 def __call__(self, rule, param):
     grad = param.grad
     if grad is None:
         return
     with chainer.using_device(param.device):
         xp = param.device.xp
         xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)
    def forward(self, args):
        flat_inputs = args[:self.num_inputs]
        param_values = args[self.num_inputs:]
        device = chainer.backend.get_device_from_array(*flat_inputs)
        inputs, i = _unflatten(flat_inputs, self.input_tmpl)
        assert i == len(flat_inputs)

        entire_inputs = {}
        assert len(self.fwd_input_names) == len(inputs)
        for name, value in zip(self.fwd_input_names, inputs):
            entire_inputs[name] = self._to_var(value)
        assert len(self.param_names) == len(param_values)
        for name, value in zip(self.param_names, param_values):
            entire_inputs[name] = self._to_var(value)

        with chainer.using_device(self.chainerx_device_name):
            outputs = self.fwd.run(entire_inputs, **self.runtime_kwargs)
        outputs_and_retained = []
        for name in self.fwd_output_names:
            outputs_and_retained.append(outputs[name])

        self.retained = outputs_and_retained[self.num_outputs:]
        # TODO(hamaji): Do not hold actual arrays.
        self.nested_outputs = []
        for output in outputs_and_retained[:self.num_outputs]:
            self.nested_outputs.append(_from_var(output, device))
        flat_outputs = _flatten(self.nested_outputs)
        return tuple(flat_outputs)
Example #20
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.W.array is None:
            with chainer.using_device(self.device):
                in_size = utils.size_of_shape(x.shape[1:])
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(
                    self.h, [batch], axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    self.xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.array) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Example #21
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.W.array is None:
            with chainer.using_device(self.device):
                in_size = utils.size_of_shape(x.shape[1:])
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(self.h, [batch],
                                                         axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    self.xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.array) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
    def init_state(self, param):
        with chainer.using_device(param.device):
            self.state['v'] = param.device.xp.zeros_like(param.data)

        # For iDeep
        if isinstance(param.data, intel64.mdarray):
            self.state['v'] = intel64.ideep.array(
                self.state['v'], itype=intel64.ideep.wgt_array)
    def update_core(self):
        optimizer = self.get_optimizer('main')
        model_main = optimizer.target
        models_others = {
            k: v
            for k, v in self._models.items() if v is not model_main
        }

        iterator = self.get_iterator('main')
        batch = iterator.next()

        #
        # Split the batch to sub-batches.
        #
        n = len(self._models)
        in_arrays_list = {}
        for i, key in enumerate(six.iterkeys(self._models)):
            in_arrays_list[key] = self.converter(batch[i::n],
                                                 self._devices[key])

        # For reducing memory
        for model in six.itervalues(self._models):
            model.cleargrads()

        losses = []
        for model_key, model in six.iteritems(self._models):
            in_arrays = in_arrays_list[model_key]
            loss_func = self.loss_func or model

            with function.force_backprop_mode():
                with chainer.using_device(self._devices[model_key]):
                    if isinstance(in_arrays, tuple):
                        loss = loss_func(*in_arrays)
                    elif isinstance(in_arrays, dict):
                        loss = loss_func(**in_arrays)
                    else:
                        loss = loss_func(in_arrays)

            losses.append(loss)

        # For _uninitialized_params
        for model in six.itervalues(self._models):
            model.cleargrads()

        for loss in losses:
            loss.backward(loss_scale=self.loss_scale)

        for model in six.itervalues(models_others):
            model_main.addgrads(model)

        optimizer.update()

        for model in six.itervalues(models_others):
            model.copyparams(model_main)

        if self.auto_new_epoch and iterator.is_new_epoch:
            optimizer.new_epoch(auto=True)
Example #24
0
    def update_core(self):
        with chainer.using_device(self.device):
            loss_localizer = self.update_localizer()
            # loss_recognizer = self.update_recognizer()

            self.log_results({
                "loss/localizer": loss_localizer,
                # "loss/recognizer": loss_recognizer
            })
Example #25
0
    def forward(self, x, finetune=False):
        if self.gamma is not None:
            gamma = self.gamma
        else:
            with chainer.using_device(self.device):
                gamma = self.xp.ones(
                    self.avg_mean.shape, dtype=x.dtype)

        if self.beta is not None:
            beta = self.beta
        else:
            with chainer.using_device(self.device):
                beta = self.xp.zeros(
                    self.avg_mean.shape, dtype=x.dtype)

        if configuration.config.train:
            if finetune:
                self.N += 1
                decay = 1. - 1. / self.N
            else:
                decay = self.decay

            avg_mean = self.avg_mean
            avg_var = self.avg_var

            if chainer.config.in_recomputing:
                # Do not update statistics when extra forward computation is
                # called.
                if finetune:
                    self.N -= 1  # Revert the count
                avg_mean = self.xp.zeros_like(self.avg_mean)
                avg_var = self.xp.zeros_like(self.avg_var)

            ret = batch_renormalization.batch_renormalization(
                x, gamma, beta, self.rmax, self.dmax,
                self.eps, avg_mean, avg_var, decay,
                update_statistics=True)
        else:
            # Use running average statistics or fine-tuned statistics.
            mean = self.avg_mean
            var = self.avg_var
            ret = batch_normalization.fixed_batch_normalization(
                x, gamma, beta, mean, var, self.eps)
        return ret
def get_embeddings(model, dataset, batch_size, xp):
    with chainer.using_device(model.device):
        embeddings = []
        for i in range(0, len(dataset), batch_size):
            batch = xp.array(list(dataset[i:i + batch_size]))
            embedding_batch = model(batch)
            embedding_flat = cuda.to_cpu(embedding_batch.array)
            embeddings.extend(embedding_flat)

    return np.array(embeddings)
Example #27
0
 def __call__(self, rule, param):
     p, g = param.data, param.grad
     if p is None or g is None:
         return
     with chainer.using_device(param.device):
         if param.device.xp is cuda.cupy:
             kernel = cuda.elementwise('T p, T decay', 'T g',
                                       'g += decay * p', 'weight_decay')
             kernel(p, self.rate, g)
         else:
             g += self.rate * p
Example #28
0
    def initialize(self):
        if self.initialized:
            return

        self.initialized = True

        if self.device_id >= 0:
            with chainer.using_device(chainer.get_device(self.device_id)):
                self.network = self.load_network(self.device_id)
        else:
            self.network = self.load_network('@numpy')
    def backward(self, target_input_indexes, grad_outputs):
        retained_inputs = self.get_retained_inputs()
        inputs = [None] * len(self.inputs)
        in_data = [None] * len(self.inputs)
        for retained, i_in in six.moves.zip(retained_inputs,
                                            self._input_indexes_to_retain):
            inputs[i_in] = retained
            in_data[i_in] = None if retained is None else retained.array
        in_data = tuple(in_data)

        grad_out_data = tuple(
            [None if grad is None else grad.array for grad in grad_outputs])

        is_chainerx_fallback_mode = self._is_chainerx_fallback_mode
        if is_chainerx_fallback_mode:
            # Convert input and output gradients to numpy/cupy
            in_data = backend.from_chx(in_data)
            grad_out_data = backend.from_chx(grad_out_data)

        # Call Function.backward
        with chainer.using_device(
                backend.get_device_from_array(*(in_data + grad_out_data))):
            if is_chainerx_fallback_mode:
                # Enable attribute fallback
                with function_node._chainerx_attribute_fallback(
                        self._function, self.chainerx_device):
                    gxs = self._function.backward(in_data, grad_out_data)
            else:
                gxs = self._function.backward(in_data, grad_out_data)

        # Check gradients
        for x, gx in six.moves.zip(self.inputs, gxs):
            if gx is not None:
                variable._check_grad_type(self, x, True, gx)

        # Convert input gradients back to ChainerX
        if is_chainerx_fallback_mode:
            gxs = backend.to_chx(gxs)

        ret = []
        for i in target_input_indexes:
            if gxs[i] is None:
                g = None
            else:
                # Intentionally not passing requires_grad=False so that
                # backprop routines can raise an error when a further backprop
                # is attempted against this gradient variable.
                g = variable.Variable(gxs[i])
                if g.xp is not chainerx:
                    g.node._old_style_grad_generator = self._function.label
            ret.append(g)

        return tuple(ret)
    def forward(self, link, inputs, device):
        x, = inputs

        device_1 = backend.GpuDevice.from_device_id(1)
        link.to_device(device_1)
        x.to_device(device_1)

        device_0 = backend.GpuDevice.from_device_id(0)
        with chainer.using_device(device_0):
            with chainer.using_config('train', not self.test):
                y = link(x, finetune=self.finetune)
        return y,
    def forward(self, link, inputs, device):
        x, = inputs

        device_1 = backend.GpuDevice.from_device_id(1)
        link.to_device(device_1)
        x.to_device(device_1)

        device_0 = backend.GpuDevice.from_device_id(0)
        with chainer.using_device(device_0):
            with chainer.using_config('train', not self.test):
                y = link(x, finetune=self.finetune)
        return y,
Example #32
0
 def __enter__(self):
     self._contexts = [
         chainer.using_config('use_cudnn', self.use_cudnn),
         chainer.using_config('cudnn_deterministic',
                              self.cudnn_deterministic),
         chainer.using_config('autotune', self.autotune),
         chainer.using_config('use_ideep', self.use_ideep),
         chainer.using_device(self.device),
     ]
     for c in self._contexts:
         c.__enter__()
     return self
Example #33
0
    def _prepare(self, param):
        device = param.device
        with chainer.using_device(device):
            state = self.state
            if state is None:
                state = self._state = {}
                self.init_state(param)

            for name, value in six.iteritems(state):
                if not isinstance(value, chainer.get_array_types()):
                    continue
                state[name] = device.send(value)
Example #34
0
    def _prepare(self, param):
        device = param.device
        with chainer.using_device(device):
            state = self.state
            if state is None:
                state = self._state = {}
                self.init_state(param)

            for name, value in six.iteritems(state):
                if not isinstance(value, chainer.get_array_types()):
                    continue
                state[name] = device.send(value)
Example #35
0
    def __call__(self, **kwargs):
        image = kwargs.pop('image', None)
        words = kwargs.pop('words', None)
        return_predictions = kwargs.pop('return_predictions', False)

        batch_size, images_per_image, num_channels, height, width = image.shape
        image = self.xp.reshape(image, (-1, num_channels, height, width))

        with chainer.using_device(self.device):
            rois, bboxes = self.localizer.predict(image)[:2]
            predicted_words, raw_classification_result = self.recognizer.predict(
                rois, return_raw_classification_result=True)
            predicted_words = F.reshape(predicted_words,
                                        (batch_size, images_per_image) +
                                        predicted_words.shape[1:])
            raw_classification_result = F.reshape(
                raw_classification_result, (batch_size, images_per_image) +
                raw_classification_result.shape[1:])

            best_indices, scores = self.determine_best_prediction_indices(
                raw_classification_result)
            chosen_indices = best_indices
            self.calc_word_accuracy(
                self.xp.concatenate([
                    predicted_words[i, best_indices[i]].array
                    for i in range(batch_size)
                ],
                                    axis=0),
                words,
                self.strip_non_alphanumeric_predictions,
            )
            if not self.only_return_best_result:
                best_indices = self.xp.arange(images_per_image)[None, ...]
                best_indices = self.xp.tile(best_indices, (batch_size, 1))
            predicted_words = self.xp.stack([
                predicted_words[i, best_indices[i]].array
                for i in range(batch_size)
            ],
                                            axis=0)

        if return_predictions:
            rois = F.reshape(rois,
                             (batch_size, images_per_image) + rois.shape[1:])
            bboxes = F.reshape(bboxes, (batch_size, images_per_image) +
                               bboxes.shape[1:])
            rois = self.xp.stack(
                [rois[i, best_indices[i]].array for i in range(batch_size)],
                axis=0)
            bboxes = self.xp.stack(
                [bboxes[i, best_indices[i]].array for i in range(batch_size)],
                axis=0)
            return rois, bboxes, predicted_words, best_indices, chosen_indices, scores
    def check_multi_devices_forward(self, device_0, device_1):
        layer, hook = self.layer, self.hook
        layer.add_hook(hook)
        layer.to_device(device_1)
        x = device_1.send(self.x)

        msg = None
        with chainer.using_device(device_0):
            try:
                layer(x)
            except Exception as e:
                msg = e
        assert msg is None
Example #37
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        else:
            xp = self.xp
            with chainer.using_device(self.device):
                self.h = variable.Variable(
                    xp.zeros((len(x), self.state_size), dtype=x.dtype))
        if self.c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    xp.zeros((len(x), self.state_size), dtype=x.dtype))

        lstm_in = reshape.reshape(
            lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4))

        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, (len(a), self.state_size))
        i = reshape.reshape(i, (len(i), self.state_size))
        f = reshape.reshape(f, (len(f), self.state_size))
        o = reshape.reshape(o, (len(o), self.state_size))

        c_tmp = tanh.tanh(a) * sigmoid.sigmoid(i) + sigmoid.sigmoid(f) * self.c
        self.c = zoneout.zoneout(self.c, c_tmp, self.c_ratio)
        self.h = zoneout.zoneout(self.h,
                                 sigmoid.sigmoid(o) * tanh.tanh(c_tmp),
                                 self.h_ratio)
        return self.h
Example #38
0
def add_noise(device, h, sigma=0.2):
    if chainer.config.train:
        xp = device.xp
        # TODO(niboshi): Support random.randn in ChainerX
        if device.xp is chainerx:
            fallback_device = device.fallback_device
            with chainer.using_device(fallback_device):
                randn = device.send(fallback_device.xp.random.randn(*h.shape))
        else:
            randn = xp.random.randn(*h.shape)
        return h + sigma * randn
    else:
        return h
Example #39
0
 def __call__(self, rule, param):
     p, g = param.data, param.grad
     if p is None or g is None:
         return
     with chainer.using_device(param.device):
         xp = param.device.xp
         sign = xp.sign(p)
         if xp is cuda.cupy:
             kernel = cuda.elementwise(
                 'T s, T decay', 'T g', 'g += decay * s', 'lasso')
             kernel(sign, self.rate, g)
         else:
             g += self.rate * sign
Example #40
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        else:
            xp = self.xp
            with chainer.using_device(self.device):
                self.h = variable.Variable(
                    xp.zeros((len(x), self.state_size), dtype=x.dtype))
        if self.c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    xp.zeros((len(x), self.state_size), dtype=x.dtype))

        lstm_in = reshape.reshape(lstm_in,
                                  (len(lstm_in), lstm_in.shape[1] // 4, 4))

        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, (len(a), self.state_size))
        i = reshape.reshape(i, (len(i), self.state_size))
        f = reshape.reshape(f, (len(f), self.state_size))
        o = reshape.reshape(o, (len(o), self.state_size))

        c_tmp = tanh.tanh(a) * sigmoid.sigmoid(i) + sigmoid.sigmoid(f) * self.c
        self.c = zoneout.zoneout(self.c, c_tmp, self.c_ratio)
        self.h = zoneout.zoneout(self.h,
                                 sigmoid.sigmoid(o) * tanh.tanh(c_tmp),
                                 self.h_ratio)
        return self.h
Example #41
0
    def reallocate_cleared_grads(self):
        """Reallocate gradients cleared by :meth:`~chainer.Variable.cleargrad`.

        This method allocates arrays for all gradients which have :obj:`None`.
        This method is called before and after every optimizer hook.
        If an inheriting optimizer does not require this allocation,
        the optimizer can override this method with a blank function.

        """
        for name, param in self.target.namedparams(False):
            if param.grad is None:
                device = param.device
                with chainer.using_device(device):
                    param.grad = device.xp.zeros_like(param.data)
Example #42
0
def _concat_arrays(arrays, padding):
    # Convert `arrays` to numpy.ndarray if `arrays` consists of the built-in
    # types such as int, float or list.
    if not isinstance(arrays[0], chainer.get_array_types()):
        arrays = numpy.asarray(arrays)

    if padding is not None:
        arr_concat = _concat_arrays_with_padding(arrays, padding)
    else:
        device = backend.get_device_from_array(arrays[0])
        with chainer.using_device(device):
            arr_concat = device.xp.concatenate(
                [array[None] for array in arrays])

    return arr_concat
    def check_deleted(self, backend_config):
        layer, hook = self.layer, self.hook
        layer.add_hook(hook)
        layer.to_device(backend_config.device)
        x = backend_config.get_array(self.x)

        with chainer.using_device(backend_config.device):
            y1 = layer(x).array
            with chainer.using_config('train', False):
                y2 = layer(x).array
            layer.delete_hook(hook.name)
            assert not hasattr(layer, hook.vector_name)
            y3 = layer(x).array
        y1, y2, y3 = _cpu._to_cpu(y1), _cpu._to_cpu(y2), _cpu._to_cpu(y3)
        assert not numpy.array_equal(y1, y3)
        assert not numpy.array_equal(y2, y3)
Example #44
0
def _concat_arrays_with_padding(arrays, padding):
    shape = numpy.array(arrays[0].shape, dtype=int)
    for array in arrays[1:]:
        if numpy.any(shape != array.shape):
            numpy.maximum(shape, array.shape, shape)
    shape = tuple(numpy.insert(shape, 0, len(arrays)))

    device = backend.get_device_from_array(arrays[0])
    with chainer.using_device(device):
        result = device.xp.full(shape, padding, dtype=arrays[0].dtype)
        for i in six.moves.range(len(arrays)):
            src = arrays[i]
            slices = tuple(slice(dim) for dim in src.shape)
            result[(i,) + slices] = src

    return result
Example #45
0
 def __enter__(self):
     contexts = [
         chainer.using_config(
             'use_cudnn', self.use_cudnn),
         chainer.using_config(
             'cudnn_deterministic', self.cudnn_deterministic),
         chainer.using_config(
             'autotune', self.autotune),
         chainer.using_config(
             'use_ideep', self.use_ideep),
         chainer.using_device(self.device),
     ]
     for c in contexts:
         c.__enter__()
     self._contexts.append(contexts)
     return self
    def accuracy(self, backend_config, loss_scaling=False):
        model = self.model
        optimizer = self.optimizer
        optimizer.setup(model)
        _optimizer_loss_scaling(optimizer, loss_scaling)

        if backend_config.use_ideep == 'always':
            if not intel64.is_ideep_available():
                # TODO(niboshi): This is temporary workaround.
                # See the comment on Skipped.
                raise Skipped('ideep is required to run this test.')

        model.to_device(backend_config.device)

        with chainer.using_device(backend_config.device):
            return self._train_linear_classifier(
                model, optimizer, backend_config)
Example #47
0
    def update_core(self, param):
        """Updates the parameter.

        Implementation of UpdateRule should override this method or both of
        :meth:`update_core_cpu` and :meth:`update_core_gpu`.

        Args:
            param (~chainer.Variable): Variable to be updated.

        """
        device = param.device
        with chainer.using_device(device):
            if device.xp is chainerx:
                self.update_core_chainerx(param)
            elif device.xp is numpy:
                self.update_core_cpu(param)
            else:
                self.update_core_gpu(param)
Example #48
0
    def sample(self, shape):
        """Generates a random sample based on given probabilities.

        Args:
            shape (tuple of int): Shape of a return value.

        Returns:
            Returns a generated array with the given shape. If a sampler is in
            CPU mode the return value is a :class:`numpy.ndarray` object, and
            if it is in GPU mode the return value is a :class:`cupy.ndarray`
            object.
        """
        xp = self._device.xp
        with chainer.using_device(self._device):
            if xp is cuda.cupy:
                return self.sample_gpu(shape)
            else:
                return self.sample_xp(xp, shape)
Example #49
0
    def _backward_chainerx(self, target_input_indexes, grad_outputs,
                           retained_inputs, retained_outputs):
        # Backward wrapper that is called from C++ via a Python binding in case
        # self.apply was called with chainerx.ndarrays.
        assert self._is_chainex_fallback_mode
        assert len(target_input_indexes) > 0
        assert (
            (self._input_indexes_to_retain is None
             and len(retained_inputs) == 0)
            or (len(self._input_indexes_to_retain) == len(retained_inputs)))
        assert (
            (self._output_indexes_to_retain is None
             and len(retained_outputs) == 0)
            or (len(self._output_indexes_to_retain) == len(retained_outputs)))
        assert all([
            a is None or isinstance(a, chainerx.ndarray)
            for a in grad_outputs])

        self._chainerx_retained_inputs = tuple([
            variable.Variable(
                array, requires_grad=array.is_backprop_required())
            for array in retained_inputs])
        self._chainerx_retained_outputs = tuple([
            variable.Variable(
                array, requires_grad=(
                    False if array is None else array.is_backprop_required()))
            for array in retained_outputs])

        device = backend.get_device_from_array(
            *(retained_inputs + retained_outputs + grad_outputs))
        with chainer.using_device(device):
            gxs = self._backward_target_inputs(
                tuple(target_input_indexes),
                tuple([
                    None
                    if gy is None
                    else chainer.Variable(
                        gy, requires_grad=gy.is_backprop_required())
                    for gy in grad_outputs]))

        gx_arrs = [gx._data[0] for gx in gxs]
        assert all([isinstance(gx, chainerx.ndarray) for gx in gx_arrs])
        return gx_arrs
    def accuracy(self, backend_config):
        # TODO(niboshi): Support it
        if backend_config.use_chainerx and self.dtype == numpy.float16:
            raise unittest.SkipTest('ChainerX does not support float16')

        model = self.model
        optimizer = self.optimizer
        optimizer.setup(model)

        if backend_config.use_ideep == 'always':
            if not intel64.is_ideep_available():
                # TODO(niboshi): This is temporary workaround.
                # See the comment on Skipped.
                raise Skipped('ideep is required to run this test.')

        model.to_device(backend_config.device)

        with chainer.using_device(backend_config.device):
            return self._train_linear_classifier(
                model, optimizer, backend_config)
Example #51
0
    def as_noncontiguous_array(a):
        if a is None:
            return None

        if a.size <= 1:
            return a

        device = backend.get_device_from_array(a)
        xp = device.xp
        slices = (slice(None, None, 2),) * a.ndim
        with chainer.using_device(device):
            ret = xp.empty(tuple([s * 2 for s in a.shape]), dtype=a.dtype)
            ret[slices] = a
            ret = ret[slices]
        if device.xp is chainerx:
            assert not ret.is_contiguous
        else:
            assert not ret.flags.c_contiguous

        return ret
Example #52
0
    def as_noncontiguous_array(a):
        if a is None:
            return None

        if a.size <= 1:
            return a

        device = backend.get_device_from_array(a)
        xp = device.xp
        with chainer.using_device(device):
            ret = xp.empty(
                (a.shape[0] * 2,) + a.shape[1:], dtype=a.dtype)
        ret[::2] = a
        ret = ret[::2]
        if device.xp is chainerx:
            assert not ret.is_contiguous
        else:
            assert not ret.flags.c_contiguous

        return ret
Example #53
0
    def _prepare_parameters(self, link, input_variable=None):
        """Prepare one buffer and one parameter.

        Args:
            link (:class:`~chainer.Link`): Link to normalize spectrally.
            input_variable (:class:`~chainer.Variable`):
                The first minibatch to initialize weight.

        """
        if getattr(link, self.weight_name).array is None:
            if input_variable is not None:
                link._initialize_params(input_variable.shape[1])
        initialW = getattr(link, self.weight_name)
        if initialW.shape[self.axis] == 0:
            raise ValueError(
                'Expect {}.shape[{}] > 0'.format(self.weight_name, self.axis)
            )
        u = link.xp.random.normal(
            size=(initialW.shape[self.axis],)).astype(dtype=initialW.dtype)
        setattr(link, self.vector_name, u)
        link.register_persistent(self.vector_name)
        if self.use_gamma:
            # Initialize the scaling parameter with the max singular value.
            weight_matrix = self.reshape_W(initialW.array)
            # TODO(crcrpar): Remove this when chainerx supports SVD.
            if link.xp is chainerx:
                xp, device, array = fallback._from_chx(weight_matrix)
                if xp is numpy:
                    _, s, _ = numpy.linalg.svd(array)
                else:
                    with chainer.using_device(device):
                        _, s, _ = xp.linalg.svd(array)
            else:
                _, s, _ = link.xp.linalg.svd(weight_matrix)
            with link.init_scope():
                link.gamma = variable.Parameter(s[0], ())
        self._initialized = True
Example #54
0
 def forward(self, x, y):
     self.args.append((x, y))
     with chainer.using_device(backend.get_device_from_array(x, y)):
         chainer.report({'loss': x.sum() + y.sum()}, self)
Example #55
0
    def backward(self, indexes, grad_outputs):
        x, W, gy = self.get_retained_inputs()

        device = backend.get_device_from_array(x.data)
        xp = device.xp

        if 0 in indexes:
            gx = chainer.Variable(xp.zeros_like(x.data))
        if 1 in indexes:
            gW = chainer.Variable(xp.zeros_like(W.data))
        if 2 in indexes:
            ggy = chainer.Variable(xp.zeros_like(gy.data))

        ggx, _, ggW = grad_outputs

        pos_neg_mask = xp.ones(self.sample_size + 1)
        pos_neg_mask[0] *= -1

        with chainer.using_device(device):
            arange = xp.arange(len(self.ignore_mask))
        for i in arange[self.ignore_mask]:
            # Partial forward pass to obtain intermediate `Variable`s
            ix = x[i]
            k = self.samples[i]

            if self.reduce == 'sum':
                igy = gy
            else:
                igy = gy[i]

            w = W[k]
            f = chainer.functions.flatten(
                chainer.functions.matmul(w, ix[:, None])) * pos_neg_mask
            sigf = chainer.functions.sigmoid(f)
            g = chainer.functions.broadcast_to(igy, f.shape) * sigf \
                * pos_neg_mask

            dgW_dg = chainer.functions.flatten(
                chainer.functions.matmul(ggW[k], ix[:, None])) * pos_neg_mask
            dgW_df = chainer.functions.broadcast_to(igy, f.shape) \
                * _sigmoid_grad(f, sigf, dgW_dg) * pos_neg_mask
            dgx_dg = chainer.functions.flatten(
                chainer.functions.matmul(ggx[i][None, :], w, transb=True))
            dgx_df = chainer.functions.broadcast_to(igy, f.shape) \
                * _sigmoid_grad(f, sigf, dgx_dg)

            if 0 in indexes:
                # derivative of gx
                dgx = chainer.functions.matmul(w, dgx_df[:, None], transa=True)

                # derivative of gW
                dgx += chainer.functions.matmul(g[None, :], ggW[k]).T
                dgx += chainer.functions.matmul(
                    w, dgW_df[:, None], transa=True)

                gx = chainer.functions.scatter_add(
                    gx, i, chainer.functions.flatten(dgx))

            if 1 in indexes:
                # derivative of gx
                shape = ggx[i].shape
                for ik, ig, idgx_df in six.moves.zip(k, g, dgx_df):
                    ig = chainer.functions.broadcast_to(ig, shape)
                    idgx_df = chainer.functions.broadcast_to(idgx_df, shape)
                    gW = chainer.functions.scatter_add(
                        gW, ik, ig * ggx[i] + idgx_df * ix)

                # derivative of gW
                gW = chainer.functions.scatter_add(
                    gW, k,
                    chainer.functions.matmul(dgW_df[:, None], ix[None, :]))

            if 2 in indexes:
                dgx_dg *= pos_neg_mask
                dggy = chainer.functions.sum((dgx_dg + dgW_dg) * sigf)
                if self.reduce == 'sum':
                    ggy += dggy
                else:
                    ggy = chainer.functions.scatter_add(ggy, i, dggy)

        ret = []
        if 0 in indexes:
            ret.append(gx)
        if 1 in indexes:
            ret.append(gW)
        if 2 in indexes:
            ret.append(ggy)
        return ret
Example #56
0
 def run(self):
     with chainer.using_device(self.device):
         self._run()
Example #57
0
def generate_array(initializer, shape, xp, dtype=None, device=None):
    # type: (types.AbstractInitializer, types.ShapeSpec, types.Xp, types.DTypeSpec, types.DeviceSpec) -> types.NdArray  # NOQA
    """Return initialized array.

    The algorithms used to make the new values depend on the
    concrete derived classes. If the initializer has the ``dtype`` attribute,
    it is used to construct the array. Otherwise, ``chainer.config.dtype`` is
    used instead. See :ref:`configuration` for the dtype config.

    Args:
        initializer: A callable object that takes :ref:`ndarray` and edits its
            value.
        shape (tuple): Shape of a return array.
        xp (module): :mod:`cupy`, :mod:`numpy`, or :mod:`chainerx`.
        dtype: Dtype specifier. If omitted, ``initializer.dtype`` is used.
        device: Target device specifier. If omitted, the current device is
             used for :mod:`cupy`, and the default device is used for
             :mod:`chainerx`.

    Returns:
        :ref:`ndarray`: An initialized array.

    """
    dtype_attr = getattr(initializer, 'dtype', None)
    if dtype is not None and dtype_attr is not None \
            and numpy.dtype(dtype) != numpy.dtype(dtype_attr):
        raise ValueError(
            'dtype mismatch: {} != {}'.format(dtype, dtype_attr))
    if dtype is None:
        dtype = dtype_attr
    dtype = chainer.get_dtype(dtype)

    if device is None:
        backend_device = backend._guess_device_from_array_module(xp)
    else:
        backend_device = chainer.get_device(device)
        if xp != backend_device.xp:
            raise ValueError('xp and device arguments are inconsistent.')

    if xp is chainerx:
        # Initialize with NumPy/CuPy array that shares memory with the
        # ChainerX array.
        # TODO(sonots): Directly use initializer after ChainerX
        # supports random.
        chx_device = backend_device.device  # type: ignore
        # TODO(okapies): remove 'type: ignore' when chainerx implements sequence support for empty() # NOQA
        array = chainerx.empty(shape, dtype=dtype, device=chx_device)  # type: ignore # NOQA
        if chx_device.backend.name == 'native':
            temp_array = _cpu._to_cpu(array)
            temp_device = cuda.DummyDevice  # type: cuda.Device
        elif chx_device.backend.name == 'cuda':
            temp_array = cuda.to_gpu(array, chx_device.index)
            temp_device = cuda.Device(chx_device.index)
        else:
            raise RuntimeError('ChainerX backend: {} is not supported.'.format(
                chx_device.backend.name))
        with temp_device:
            initializer(temp_array)
        return array

    with chainer.using_device(backend_device):
        array = xp.empty(shape, dtype=dtype)
        initializer(array)
    return array
Example #58
0
    def forward(self, x, **kwargs):
        """forward(self, x, finetune=False)

        Invokes the forward propagation of BatchNormalization.

        In training mode, the BatchNormalization computes moving averages of
        mean and variance for evaluation during training, and normalizes the
        input using batch statistics.

        Args:
            x (Variable): Input variable.
            finetune (bool): If it is in the training mode and ``finetune`` is
                ``True``, BatchNormalization runs in fine-tuning mode; it
                accumulates the input array to compute population statistics
                for normalization, and normalizes the input using batch
                statistics.

        """
        finetune, = argument.parse_kwargs(
            kwargs, ('finetune', False),
            test='test argument is not supported anymore. '
                 'Use chainer.using_config')

        if self.avg_mean is None:
            param_shape = tuple([
                d
                for i, d in enumerate(x.shape)
                if i not in self.axis])
            self._initialize_params(param_shape)

        gamma = self.gamma
        if gamma is None:
            with chainer.using_device(self.device):
                gamma = self.xp.ones(
                    self.avg_mean.shape, dtype=self._highprec_dtype)

        beta = self.beta
        if beta is None:
            with chainer.using_device(self.device):
                beta = self.xp.zeros(
                    self.avg_mean.shape, dtype=self._highprec_dtype)

        if configuration.config.train:
            if finetune:
                self.N += 1
                decay = 1. - 1. / self.N
            else:
                decay = self.decay

            avg_mean = self.avg_mean
            avg_var = self.avg_var

            if chainer.config.in_recomputing:
                # Do not update statistics when extra forward computation is
                # called.
                if finetune:
                    self.N -= 1  # Revert the count
                avg_mean = None
                avg_var = None

            ret = functions.batch_normalization(
                x, gamma, beta, eps=self.eps, running_mean=avg_mean,
                running_var=avg_var, decay=decay, axis=self.axis)
        else:
            # Use running average statistics or fine-tuned statistics.
            mean = self.avg_mean
            var = self.avg_var
            ret = functions.fixed_batch_normalization(
                x, gamma, beta, mean, var, self.eps, axis=self.axis)
        return ret
Example #59
0
 def init_hx(self, xs):
     shape = (self.n_layers * self.direction, len(xs), self.out_size)
     with chainer.using_device(self.device):
         hx = variable.Variable(self.xp.zeros(shape, dtype=xs[0].dtype))
     return hx