Beispiel #1
0
    def forward_cpu(self, inputs):
        if ((self.dy == 1 and self.dx == 1)
                and intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            self._use_ideep = True

        self.retain_inputs((0, 1))  # only retain x and W
        if len(inputs) == 2:
            (x, W), b = inputs, None
        else:
            x, W, b = inputs

        self._calc_out_size(x, W)

        if self.groups > 1:
            # Grouped convolution implementation
            return self._forward_grouped_convolution(x, W, b)

        elif (intel64.should_use_ideep('>=auto')
              and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            self._use_ideep = True
            return self._forward_ideep(x, W, b)

        else:
            return self._forward_cpu_core(x, W, b)
    def forward_cpu(self, inputs):
        if ((self.dy == 1 and self.dx == 1)
                and intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            self._use_ideep = True

        self.retain_inputs((0, 1))  # only retain x and W
        if len(inputs) == 2:
            (x, W), b = inputs, None
            x_layout, w_layout = self.input_layouts
        else:
            x, W, b = inputs
            x_layout, w_layout, _ = self.input_layouts

        x_shape = memory_layouts._transpose_shape(x.shape, x_layout, None)
        w_shape = memory_layouts._transpose_shape(W.shape, w_layout, None)
        self._calc_out_size(x_shape, w_shape)

        if self.groups > 1:
            # Grouped convolution implementation
            return self._forward_grouped_convolution(x, W, b)

        elif (intel64.should_use_ideep('>=auto')
              and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            self._use_ideep = True
            return self._forward_ideep(x, W, b)

        else:
            return self._forward_cpu_core(x, W, b)
Beispiel #3
0
    def forward_cpu(self, x):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x)):
            return self._forward_ideep(x)

        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        col = conv.im2col_cpu(x[0],
                              self.kh,
                              self.kw,
                              self.sy,
                              self.sx,
                              self.ph,
                              self.pw,
                              pval=-float('inf'),
                              cover_all=self.cover_all)
        n, c, kh, kw, out_h, out_w = col.shape
        col = col.reshape(n, c, kh * kw, out_h, out_w)

        # We select maximum twice, since the implementation using numpy.choose
        # hits its bug when kh * kw >= 32.
        self.indexes = col.argmax(axis=2)
        y = col.max(axis=2)
        return y,
Beispiel #4
0
    def forward(self, x):
        if (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(x)
                and self.mask is None):
            return self._forward_ideep(x)

        if self.mask is not None:
            y = x[0] * self.mask
        else:
            scale = x[0].dtype.type(1. / (1 - self.dropout_ratio))
            xp = cuda.get_array_module(*x)
            if xp == numpy:
                flag = xp.random.rand(*x[0].shape) >= self.dropout_ratio
                self.mask = scale * flag
                y = x[0] * self.mask
            else:
                rand = xp.random.rand(*x[0].shape, dtype=numpy.float32)
                self.mask, y = cuda.elementwise(
                    'T x, R r, T scale, T ratio',
                    'T mask, T y',
                    '''
                    mask = (r >= ratio) * scale;
                    y = x * mask;
                    ''',
                    'dropout_fwd',
                )(x[0], rand, scale, self.dropout_ratio)
        return y,
Beispiel #5
0
    def forward_cpu(self, x):
        if (self.ndim == 2 and intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x)):
            return self._forward_2d_ideep(x)

        ksize = self.ksize
        stride = self.stride
        pad = self.pad
        cover_all = self.cover_all

        in_shape = x[0].shape
        in_dtype = x[0].dtype

        col = conv_nd.im2col_nd_cpu(x[0],
                                    ksize,
                                    stride,
                                    pad,
                                    pval=-float('inf'),
                                    cover_all=cover_all)
        n, c = col.shape[:2]
        mid = (len(col.shape) - 2) // 2 + 2
        ksize = col.shape[2:mid]
        outs = col.shape[mid:]
        # (n, c, k_1 * k_2 * ... * k_N, out_1, out_2, ..., out_N)
        col_shape = (n, c) + (functools.reduce(mul, ksize), ) + outs
        col = col.reshape(col_shape)

        # We select maximum twice, since the implementation using numpy.choose
        # hits its bug when kh * kw >= 32.
        y = col.max(axis=2)

        self._in_shape = in_shape
        self._in_dtype = in_dtype
        self.indexes = col.argmax(axis=2)
        return y,
Beispiel #6
0
    def forward_cpu(self, gy):
        func = self.func

        if (func.ndim == 2 and intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_2d_ideep(gy)

        ndim = func.ndim
        ksize = func.ksize
        stride = func.stride
        pad = func.pad
        in_shape = func._in_shape
        in_dtype = func._in_dtype
        indexes = func.indexes

        n, c = gy[0].shape[:2]
        outs = gy[0].shape[2:]
        dims = in_shape[2:]
        prod_outs = functools.reduce(mul, outs)
        prod_ksize = functools.reduce(mul, ksize)

        gcol = numpy.zeros(n * c * prod_outs * prod_ksize, dtype=in_dtype)

        indexes = (indexes.flatten() +
                   numpy.arange(0, indexes.size * prod_ksize, prod_ksize))

        gcol[indexes] = gy[0].ravel()
        gcol_shape = (n, c) + outs + ksize
        gcol = gcol.reshape(gcol_shape)
        for i in six.moves.range(ndim):
            gcol = numpy.swapaxes(gcol, 2 + i, ndim + 2 + i)

        gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        return gx,
Beispiel #7
0
    def forward(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self._forward_ideep(inputs)

        y = inputs[0] * self.mask
        return y,
Beispiel #8
0
    def forward(self, x):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x)):
            return self._forward_ideep(x)

        if self.mask is not None:
            y = x[0] * self.mask
        else:
            scale = x[0].dtype.type(1. / (1 - self.dropout_ratio))
            xp = cuda.get_array_module(*x)
            if xp == numpy:
                flag = xp.random.rand(*x[0].shape) >= self.dropout_ratio
                self.mask = scale * flag
                y = x[0] * self.mask
            else:
                rand = xp.random.rand(*x[0].shape, dtype=numpy.float32)
                self.mask, y = cuda.elementwise(
                    'T x, R r, T scale, T ratio', 'T mask, T y',
                    '''
                    mask = (r >= ratio) * scale;
                    y = x * mask;
                    ''',
                    'dropout_fwd',
                )(x[0], rand, scale, self.dropout_ratio)
        return y,
Beispiel #9
0
    def forward(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self._forward_ideep(inputs)

        y = inputs[0] * self.mask
        return y,
Beispiel #10
0
    def init_state(self, param):
        xp = backend.get_array_module(param.data)
        with cuda.get_device_from_array(param.data):
            self.state['m'] = xp.zeros_like(param.data)
            self.state['v'] = xp.zeros_like(param.data)
            if self.hyperparam.amsgrad:
                self.state['vhat'] = xp.zeros_like(param.data)

        # For iDeep
        if (isinstance(param.data, intel64.mdarray)
                and intel64.inputs_all_ready((self.state['m'],))
                and intel64.inputs_all_ready((self.state['v'],))):
            self.state['m'] = intel64.ideep.array(
                self.state['m'], itype=intel64.ideep.wgt_array)
            self.state['v'] = intel64.ideep.array(
                self.state['v'], itype=intel64.ideep.wgt_array)
Beispiel #11
0
    def init_state(self, param):
        xp = cuda.get_array_module(param.data)
        with cuda.get_device_from_array(param.data):
            self.state['m'] = xp.zeros_like(param.data)
            self.state['v'] = xp.zeros_like(param.data)
            if self.hyperparam.amsgrad:
                self.state['vhat'] = xp.zeros_like(param.data)

        # For iDeep
        if (isinstance(param.data, intel64.mdarray)
                and intel64.inputs_all_ready((self.state['m'], ))
                and intel64.inputs_all_ready((self.state['v'], ))):
            self.state['m'] = intel64.ideep.array(
                self.state['m'], itype=intel64.ideep.wgt_array)
            self.state['v'] = intel64.ideep.array(
                self.state['v'], itype=intel64.ideep.wgt_array)
Beispiel #12
0
    def forward(self, inputs):
        self._config_use_ideep = chainer.config.use_ideep
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            return self._forward_ideep(inputs)

        # Generic implementation
        if len(inputs) == 3:
            x, W, b = inputs
        else:
            (x, W), b = inputs, None

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (isinstance(x, numpy.ndarray)
                and not (x.flags.c_contiguous or x.flags.f_contiguous)
                and 1 in x.shape):
            x = numpy.ascontiguousarray(x)

        y = x.dot(W.T).astype(x.dtype, copy=False)
        if b is not None:
            y += b
        self.retain_inputs((0, 1))  # b is not retained
        return y,
Beispiel #13
0
    def forward(self, inputs):
        self._config_use_ideep = chainer.config.use_ideep
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            return self._forward_ideep(inputs)

        # Generic implementation
        if len(inputs) == 3:
            x, W, b = inputs
        else:
            (x, W), b = inputs, None

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (isinstance(x, numpy.ndarray) and
                not (x.flags.c_contiguous or x.flags.f_contiguous) and
                1 in x.shape):
            x = numpy.ascontiguousarray(x)

        y = x.dot(W.T).astype(x.dtype, copy=False)
        if b is not None:
            y += b
        self.retain_inputs((0, 1))  # b is not retained
        return y,
Beispiel #14
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self.forward_ideep(inputs)

        gy, = inputs
        gx = gy * (self.b > 0)
        return utils.force_array(gx, dtype=gy.dtype),
Beispiel #15
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self.forward_ideep(inputs)

        gy, = inputs
        gx = gy * (self.b > 0)
        return utils.force_array(gx, dtype=gy.dtype),
Beispiel #16
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self.forward_ideep(inputs)

        x, = inputs
        y = numpy.maximum(x, 0, dtype=x.dtype)
        self.retain_outputs((0,))
        return utils.force_array(y),
Beispiel #17
0
    def init_state(self, param):
        xp = cuda.get_array_module(param.data)
        with cuda.get_device_from_array(param.data):
            self.state['v'] = xp.zeros_like(param.data)

        # For iDeep
        if intel64.inputs_all_ready((self.state['v'],)):
            self.state['v'] = intel64.ideep.array(
                self.state['v'], itype=intel64.ideep.wgt_array)
Beispiel #18
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self.forward_ideep(inputs)

        gy, = inputs
        gy = gy.copy()
        gy[self.cond <= 0] *= self.slope
        return gy,
    def forward(self, xs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(xs, (4, ))):
            # iDeep implementation
            return self._forward_ideep(xs)

        # Generic implementation
        xp = backend.get_array_module(*xs)
        return xp.concatenate(xs, self.axis),
Beispiel #20
0
    def forward(self, xs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(xs, (4,))):
            # iDeep implementation
            return self._forward_ideep(xs)

        # Generic implementation
        xp = cuda.get_array_module(*xs)
        return xp.concatenate(xs, self.axis),
Beispiel #21
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self.forward_ideep(inputs)

        x, = inputs
        y = numpy.maximum(x, 0, dtype=x.dtype)
        self.retain_outputs((0, ))
        return utils.force_array(y),
Beispiel #22
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            self._use_ideep = True
            return self.forward_ideep(inputs)

        x, = inputs
        self.retain_outputs((0,))
        return utils.force_array(numpy.maximum(x, 0, dtype=x.dtype)),
Beispiel #23
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            self._use_ideep = True
            return self.forward_ideep(inputs)

        x, = inputs
        self.retain_outputs((0,))
        return utils.force_array(numpy.maximum(x, 0, dtype=x.dtype)),
Beispiel #24
0
    def forward_cpu(self, gy):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_ideep(gy)

        h, w = self._in_shape[2:]
        gcol = numpy.tile(gy[0][:, :, None, None],
                          (1, 1, self.kh, self.kw, 1, 1))
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        gx /= self.kh * self.kw
        return gx,
Beispiel #25
0
    def forward_cpu(self, gy):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_ideep(gy)

        h, w = self._in_shape[2:]
        gcol = numpy.tile(gy[0][:, :, None, None],
                          (1, 1, self.kh, self.kw, 1, 1))
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        gx /= self.kh * self.kw
        return gx,
Beispiel #26
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self.forward_ideep(inputs)

        gy, = inputs
        gy = gy.copy()
        if self.slope >= 0:
            gy[self.y < 0] *= self.slope
        else:
            gy[self.x < 0] *= self.slope
        return gy,
Beispiel #27
0
    def forward_cpu(self, x):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x)):
            return self._forward_ideep(x)

        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        col = conv.im2col_cpu(x[0], self.kh, self.kw, self.sy, self.sx,
                              self.ph, self.pw)
        y = col.mean(axis=(2, 3))
        return y,
Beispiel #28
0
    def forward_cpu(self, x):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x)):
            return self._forward_ideep(x)

        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        col = conv.im2col_cpu(x[0], self.kh, self.kw, self.sy, self.sx,
                              self.ph, self.pw)
        y = col.mean(axis=(2, 3))
        return y,
Beispiel #29
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self.forward_ideep(inputs)

        x, = inputs
        y = x.copy()
        y[x < 0] *= self.slope
        if self.slope >= 0:
            self.retain_outputs((0, ))
        else:
            self.retain_inputs((0, ))
        return y,
Beispiel #30
0
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            return self.forward_ideep(inputs)

        x, = inputs
        y = x.copy()
        y[x < 0] *= self.slope
        if self.slope >= 0:
            self.retain_outputs((0,))
        else:
            self.retain_inputs((0,))
        return y,
Beispiel #31
0
    def forward_cpu(self, x):
        if (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(x)
                and self.mask is None):
            return self._forward_ideep(x)

        if self.mask is not None:
            y = x[0] * self.mask
        else:
            scale = x[0].dtype.type(1. / (1 - self.dropout_ratio))
            flag = numpy.random.rand(*x[0].shape) >= self.dropout_ratio
            self.mask = scale * flag
            y = x[0] * self.mask
        return y,
Beispiel #32
0
    def forward(self, axis, gamma, x, xp, expander,
                beta, eps, decay, running_mean, running_var):
        if not (
                x.dtype == gamma.dtype
                and gamma.ndim == 1
                and intel64.inputs_all_ready((x,))):
            self._forward_fallback = True
            return super().forward(
                axis, gamma, x, xp, expander, beta, eps, decay,
                running_mean, running_var)

        expand_dim = False
        if x.ndim == 2:
            expand_dim = True
            x = x[:, :, None, None]

        y, mean, var, inv_std = (
            intel64.ideep.batchNormalization.Forward(
                intel64.ideep.array(x.astype(gamma.dtype, copy=False)),
                intel64.ideep.array(gamma),
                intel64.ideep.array(beta),
                None,
                None,
                eps
            ))
        y = y.astype(x.dtype, copy=False)

        if expand_dim:
            y = numpy.squeeze(y, axis=(2, 3))

        # Update running statistics if given
        if running_mean is not None:
            m = x.size // gamma.size
            adjust = m / max(m - 1., 1.)

            # Update running_mean
            if isinstance(running_mean, intel64.ideep.mdarray):
                running_mean.inplace_axpby(
                    decay, (1 - decay), mean)
            else:
                running_mean *= decay
                running_mean += mean * (1 - decay)

            # Update running_var
            if isinstance(running_var, intel64.ideep.mdarray):
                running_var.inplace_axpby(
                    decay, (1 - decay), var * adjust)
            else:
                running_var *= decay
                running_var += var * adjust * (1 - decay)
        return y, running_mean, running_var, mean, var, inv_std
Beispiel #33
0
    def forward_cpu(self, x):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x)
                and self.mask is None):
            return self._forward_ideep(x)

        if self.mask is not None:
            y = x[0] * self.mask
        else:
            scale = x[0].dtype.type(1. / (1 - self.dropout_ratio))
            flag = numpy.random.rand(*x[0].shape) >= self.dropout_ratio
            self.mask = scale * flag
            y = x[0] * self.mask
        return y,
Beispiel #34
0
 def __init__(self, x, gamma, key_axis):
     is_gamma_1d = gamma.ndim == 1
     # cuDNN only supports these tensor dimensions because they are
     # the most commonly used. If there is a need to support other
     # dimensions with cuDNN, we could consider reshaping the input
     # into a 2-dim array with channels as second dim and m=<product
     # of all dimensions except the 2nd dimension> as the first
     # dimension.
     self.is_for_conv2d = is_gamma_1d and x.ndim == 4 and key_axis[0] == 1
     self.is_for_linear = is_gamma_1d and key_axis[0] == x.ndim - 1
     self.cudnn_dim_ok = self.is_for_conv2d or self.is_for_linear
     # self.cudnn_dtype_ok = x.dtype != numpy.float16
     self.cudnn_dtype_ok = self.is_for_conv2d or (x.dtype != numpy.float16)
     self.ideep_ok = is_gamma_1d and intel64.inputs_all_ready((x, ))
 def __init__(self, x, gamma):
     is_gamma_1d = gamma.ndim == 1
     # cuDNN only supports these tensor dimensions because they are
     # the most commonly used. If there is a need to support other
     # dimensions with cuDNN, we could consider reshaping the input
     # into a 2-dim array with channels as second dim and m=<product
     # of all dimensions except the 2nd dimension> as the first
     # dimension.
     self.is_for_conv2d = x.ndim == 4 and is_gamma_1d
     self.is_for_linear = x.ndim == 2 and is_gamma_1d
     self.cudnn_dim_ok = self.is_for_conv2d or self.is_for_linear
     # self.cudnn_dtype_ok = x.dtype != numpy.float16
     self.cudnn_dtype_ok = self.is_for_conv2d or (x.dtype != numpy.float16)
     self.ideep_ok = is_gamma_1d and intel64.inputs_all_ready((x,))
Beispiel #36
0
    def forward_cpu(self, inputs):
        self.retain_inputs((0, 1))  # retain only x and W
        if len(inputs) == 2:
            (x, W), b = inputs, None
        else:
            x, W, b = inputs

        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            self._use_ideep = True

        if self.groups > 1:
            return self._forward_grouped_convolution(x, W, b)
        else:
            return self._forward_cpu_core(x, W, b)
Beispiel #37
0
    def forward(self, xs):
        self.len = len(xs)
        if len(xs) == 1:
            return xs
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(xs)):
            y = intel64.ideep.multi_add(xs)
        else:
            # The output should be a new array. Add the first 2 arrays
            # and get the result y. Then add the rest arrays to y.
            y = xs[0] + xs[1]
            for x in xs[2:]:
                y += x

        return utils.force_array(y),
Beispiel #38
0
    def forward(self, inputs):
        # Currently iDeep only supports 4 dims
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs, (4,))
                and self._ideep_is_supported(inputs)):
            return self._forward_ideep(inputs)

        x, = inputs
        self._xp = backend.get_array_module(x)
        indices_or_sections = self.indices_or_sections
        ret = self._xp.split(x, indices_or_sections, self.axis)
        if self._xp == numpy and not _numpy_split_ok:
            ret = _fix_numpy_split(ret, x, indices_or_sections, self.axis)
        self._shapes = [r.shape for r in ret]
        return tuple(ret)
Beispiel #39
0
    def forward(self, inputs):
        # Currently iDeep only supports 4 dims
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs, (4, ))
                and self._ideep_is_supported(inputs)):
            return self._forward_ideep(inputs)

        x, = inputs
        self._xp = backend.get_array_module(x)
        indices_or_sections = self.indices_or_sections
        ret = self._xp.split(x, indices_or_sections, self.axis)
        if self._xp == numpy and not _numpy_split_ok:
            ret = _fix_numpy_split(ret, x, indices_or_sections, self.axis)
        self._shapes = [r.shape for r in ret]
        return tuple(ret)
Beispiel #40
0
    def forward_cpu(self, inputs):
        self.retain_inputs((0, 1))  # retain only x and W
        if len(inputs) == 2:
            (x, W), b = inputs, None
        else:
            x, W, b = inputs

        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            self._use_ideep = True

        if self.groups > 1:
            return self._forward_grouped_convolution(x, W, b)
        else:
            return self._forward_cpu_core(x, W, b)
    def forward_cpu(self, x):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x, (4, ))):
            self._use_ideep = True
            return self._forward_ideep(x)

        half_n = self.n // 2
        x2 = numpy.square(x[0])
        sum_part = x2.copy()
        for i in six.moves.range(1, half_n + 1):
            sum_part[:, i:] += x2[:, :-i]
            sum_part[:, :-i] += x2[:, i:]
        self.unit_scale = self.k + self.alpha * sum_part
        self.scale = self.unit_scale**-self.beta
        self.y = x[0] * self.scale
        return self.y,
Beispiel #42
0
    def forward(self, inputs):
        # Currently iDeep only supports 4 dims
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs, (4, ))
                and self._ideep_is_supported(inputs)):
            return self._forward_ideep(inputs)

        x, = inputs
        self._xp = cuda.get_array_module(x)
        if self.indices is not None:
            indices_or_sections = self.indices
        else:
            indices_or_sections = self.sections
        ret = tuple(self._xp.split(x, indices_or_sections, self.axis))
        self._shapes = [r.shape for r in ret]
        return ret
Beispiel #43
0
    def forward(self, inputs):
        # Currently iDeep only supports 4 dims
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs, (4,))
                and self._ideep_is_supported(inputs)):
            return self._forward_ideep(inputs)

        x, = inputs
        if isinstance(self.indices_or_sections, collections.Iterable):
            cdimx = x.shape[self.axis]
            ind = list(self.indices_or_sections)
            ind.append(cdimx)
        self._xp = cuda.get_array_module(x)
        ret = tuple(self._xp.split(x, self.indices_or_sections, self.axis))
        self._shapes = [r.shape for r in ret]
        return ret
Beispiel #44
0
    def forward(self, inputs):
        # Currently iDeep only supports 4 dims
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs, (4, ))
                and self._ideep_is_supported(inputs)):
            return self._forward_ideep(inputs)

        x, = inputs
        if isinstance(self.indices_or_sections, collections.Iterable):
            cdimx = x.shape[self.axis]
            ind = list(self.indices_or_sections)
            ind.append(cdimx)
        self._xp = cuda.get_array_module(x)
        ret = tuple(self._xp.split(x, self.indices_or_sections, self.axis))
        self._shapes = [r.shape for r in ret]
        return ret
    def forward_cpu(self, x):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x, (4,))):
            self._use_ideep = True
            return self._forward_ideep(x)

        half_n = self.n // 2
        x2 = numpy.square(x[0])
        sum_part = x2.copy()
        for i in six.moves.range(1, half_n + 1):
            sum_part[:, i:] += x2[:, :-i]
            sum_part[:, :-i] += x2[:, i:]
        self.unit_scale = self.k + self.alpha * sum_part
        self.scale = self.unit_scale ** -self.beta
        self.y = x[0] * self.scale
        return self.y,
Beispiel #46
0
    def forward(self, inputs):
        self._config_use_ideep = chainer.config.use_ideep
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            return self._forward_ideep(inputs)

        # Generic implementation
        self.retain_inputs((0, 1))
        W, gy = inputs

        if (isinstance(gy, numpy.ndarray) and
                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
                1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        gx = gy.dot(W).astype(gy.dtype, copy=False)
        return gx,
    def forward_cpu(self, inputs):
        if (self.groups == 1 and intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            self._use_ideep = True
            return self._forward_ideep(inputs)

        self.retain_inputs((0, 1))  # retain only x and W
        self.retain_outputs((0, ))
        if len(inputs) == 2:
            (x, W), b = inputs, None
        else:
            x, W, b = inputs

        if self.groups > 1:
            return self._forward_grouped_convolution(x, W, b)
        else:
            return self._forward_cpu_core(x, W, b)
Beispiel #48
0
 def to_intel64(self):
     """Copies parameter variables and persistent values to CPU."""
     intel64.check_ideep_available()
     d = self.__dict__
     for name in self._params:
         d[name].to_intel64()
     for name in self._persistent:
         value = d[name]
         if isinstance(value, cuda.ndarray):
             value = value.get()  # to numpy.ndarray
         if (isinstance(value, numpy.ndarray) and intel64.inputs_all_ready(
             (value, ))):
             value = intel64.ideep.array(value,
                                         itype=intel64.ideep.wgt_array)
         d[name] = value
     self._cpu = True
     self._device_id = None
     return self
Beispiel #49
0
    def forward(self, inputs):
        self._config_use_ideep = chainer.config.use_ideep
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            return self._forward_ideep(inputs)

        # Generic implementation
        self.retain_inputs((0, 1))
        W, gy = inputs

        if (isinstance(gy, numpy.ndarray) and
                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
                1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        gx = gy.dot(W).astype(gy.dtype, copy=False)
        return gx,
Beispiel #50
0
    def forward_cpu(self, x):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(x)):
            return self._forward_ideep(x)

        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        col = conv.im2col_cpu(
            x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
            pval=-float('inf'), cover_all=self.cover_all)
        n, c, kh, kw, out_h, out_w = col.shape
        col = col.reshape(n, c, kh * kw, out_h, out_w)

        # We select maximum twice, since the implementation using numpy.choose
        # hits its bug when kh * kw >= 32.
        self.indexes = col.argmax(axis=2)
        y = col.max(axis=2)
        return y,
Beispiel #51
0
    def forward(self, xs):
        self.len = len(xs)
        if len(xs) == 1:
            return xs
        y = None
        if intel64.should_use_ideep('>=auto'):
            bxs = numpy.broadcast_arrays(*xs)
            if intel64.inputs_all_ready(bxs):
                y = intel64.ideep.multi_add(bxs)
        if y is None:
            # The output should be a new array. Add the first 2 arrays
            # and get the result y. Then add the rest arrays to y.
            y = xs[0] + xs[1]
            for x in xs[2:]:
                if x.shape == y.shape:
                    y += x
                else:
                    y = x + y

        return utils.force_array(y),
    def forward_cpu(self, inputs):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs, (4,))):
            self._use_ideep = True
            return self.forward_ideep(inputs)

        x, = inputs
        self.retain_inputs((0,))
        self.retain_outputs((0,))

        half_n = self.n // 2
        x2 = numpy.square(x)
        sum_part = x2.copy()
        for i in six.moves.range(1, half_n + 1):
            sum_part[:, i:] += x2[:, :-i]
            sum_part[:, :-i] += x2[:, i:]
        self.unit_scale = self.k + self.alpha * sum_part
        self.scale = self.unit_scale ** -self.beta
        y = x * self.scale
        return y,
Beispiel #53
0
    def forward(self, inputs):
        self._config_use_ideep = chainer.config.use_ideep
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(inputs)):
            # iDeep implementation
            return self._forward_ideep(inputs)

        # Generic implementation
        if len(inputs) == 3:
            x, W, b = inputs
        else:
            (x, W), b = inputs, None

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (isinstance(x, numpy.ndarray) and
                not (x.flags.c_contiguous or x.flags.f_contiguous) and
                1 in x.shape):
            x = numpy.ascontiguousarray(x)

        # In order to be compatible with the "static graph" feature, it is
        # required that all output arrays of this forward
        # function be allocated explicitly:
        xp = cuda.get_array_module(x)
        y = xp.empty((x.shape[0], W.shape[0]), dtype=x.dtype)

        # This is required because all of the "static_*()" functions
        # use the convention that any output arrays are supplied
        # as input arguments to the function. That is because it is
        # not allowed for a "static_*()" function to return anything
        # other than `None`. The reason is to prevent dynamic allocation
        # of output arrays during execution of the static schedule
        # because it would break the model.
        self.static_linear_no_bias(xp, x.dtype == W.dtype, inputs=[x, W],
                                   outputs=[y])
        if len(inputs) == 3:
            self.static_add_bias(inputs=[b], outputs=[y])

        self.retain_inputs((0, 1))  # b is not retained
        return y,
Beispiel #54
0
    def forward_cpu(self, gy):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_ideep(gy)

        n, c, out_h, out_w = gy[0].shape
        h, w = self._in_shape[2:]
        kh, kw = self.kh, self.kw

        gcol = numpy.zeros(
            (n * c * out_h * out_w * kh * kw), dtype=self._in_dtype)

        indexes = self.indexes.flatten()
        indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw)

        gcol[indexes] = gy[0].ravel()
        gcol = gcol.reshape(n, c, out_h, out_w, kh, kw)
        gcol = numpy.swapaxes(gcol, 2, 4)
        gcol = numpy.swapaxes(gcol, 3, 5)

        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        return gx,