def _forward_grouped_convolution_xp(self, x, W, b, xp): # G: group count # N: batch size # xC: input channels # yC: output channels G = self.groups N, xC = x.shape[:2] x_size = x.shape[2:] yCg = W.shape[1] yC = yCg * G xCg = xC // G k_size = W.shape[2:] dims = len(k_size) if xC % G != 0: raise TypeError('The number of groups must be ' 'a divisor of that of input channels') x = xp.rollaxis(x, 1) # (xC, N, x_size...) x = x.reshape(G, xCg, N * utils.size_of_shape(x_size)) W = W.reshape(G, xCg, yCg * utils.size_of_shape(k_size)) W = W.transpose(0, 2, 1) # (G, yCg*k_size, xCg) # (G, yCg*k_size, N*x_size) = (G, yCg*k_size, xCg) @ (G, xCg, N*x_size) col = convolution_2d._matmul(W, x).astype(x.dtype, copy=False) col = col.reshape((yC,) + k_size + (N,) + x_size) col = xp.rollaxis(col, dims + 1) # (N, yC, k_size..., x_size...) y = conv_nd.col2im_nd(col, self.stride, self.pad, self.outs, dilate=self.dilate) if b is not None: y += b.reshape(1, yC, *((1,) * dims)) return y,
def check_log_prob(self, is_gpu): smp = self.sample_for_test() if is_gpu: log_prob1 = self.gpu_dist.log_prob(cuda.to_gpu(smp)).data else: log_prob1 = self.cpu_dist.log_prob(smp).data if self.continuous: scipy_prob = self.scipy_dist.logpdf else: scipy_prob = self.scipy_dist.logpmf if self.scipy_onebyone: onebyone_smp = smp.reshape(*[ utils.size_of_shape(sh) for sh in [self.sample_shape, self.shape, self.event_shape] ]) onebyone_smp = numpy.swapaxes(onebyone_smp, 0, 1) onebyone_smp = onebyone_smp.reshape((-1, ) + self.sample_shape + self.event_shape) log_prob2 = [] for one_params, one_smp in zip(self.scipy_onebyone_params_iter(), onebyone_smp): log_prob2.append(scipy_prob(one_smp, **one_params)) log_prob2 = numpy.vstack(log_prob2) log_prob2 = log_prob2.reshape(utils.size_of_shape(self.shape), -1).T log_prob2 = log_prob2.reshape(self.sample_shape + self.shape) else: log_prob2 = scipy_prob(smp, **self.scipy_params) array.assert_allclose(log_prob1, log_prob2)
def _forward_grouped_convolution_xp(self, x, gy, xp): G = self.groups N, iC = x.shape[:2] oC = gy.shape[1] o_size = gy.shape[2:] o_size_prod = utils.size_of_shape(o_size) k_size = self.ksize dims = len(o_size) iCg = iC // G oCg = oC // G # Do not check iCg and oCg because this class is rarely used alone # (N, iC, k_size..., o_size...) x = conv_nd.im2col_nd(x, k_size, self.stride, self.pad, cover_all=self.cover_all, dilate=self.dilate) x = xp.rollaxis(x, 0, dims + 2) # (iC, k_size..., N, o_size...) mul_len = iCg * utils.size_of_shape(k_size) x = x.reshape(G, mul_len, N * o_size_prod) x = x.transpose(0, 2, 1) # (G, N*o_size, iCg*k_size) gy = xp.rollaxis(gy, 1) # (oC, N, o_size...) gy = gy.reshape(G, oCg, N * o_size_prod) # (G, oCg, iCg*k_size) = (G, oCg, N*o_size) @ (G, N*o_size, iCg*k_size) gW = convolution_2d._matmul(gy, x).astype(self.W_dtype, copy=False) gW = gW.reshape(oC, iCg, *k_size) return gW,
def check_log_prob(self, is_gpu): smp = self.sample_for_test() if is_gpu: log_prob1 = self.gpu_dist.log_prob(cuda.to_gpu(smp)).data else: log_prob1 = self.cpu_dist.log_prob(smp).data if self.continuous: scipy_prob = self.scipy_dist.logpdf else: scipy_prob = self.scipy_dist.logpmf if self.scipy_onebyone: onebyone_smp = smp.reshape(*[ utils.size_of_shape(sh) for sh in [self.sample_shape, self.shape, self.event_shape]]) onebyone_smp = numpy.swapaxes(onebyone_smp, 0, 1) onebyone_smp = onebyone_smp.reshape((-1,) + self.sample_shape + self.event_shape) log_prob2 = [] for one_params, one_smp in zip( self.scipy_onebyone_params_iter(), onebyone_smp): log_prob2.append(scipy_prob(one_smp, **one_params)) log_prob2 = numpy.vstack(log_prob2) log_prob2 = log_prob2.reshape( utils.size_of_shape(self.shape), -1).T log_prob2 = log_prob2.reshape(self.sample_shape + self.shape) else: log_prob2 = scipy_prob(smp, **self.scipy_params) array.assert_allclose(log_prob1, log_prob2)
def check_sample(self, is_gpu): if is_gpu: smp1 = self.gpu_dist.sample( sample_shape=(100000,)+self.sample_shape).data else: smp1 = self.cpu_dist.sample( sample_shape=(100000,)+self.sample_shape).data if self.scipy_onebyone: smp2 = [] for one_params in self.scipy_onebyone_params_iter(): smp2.append(self.scipy_dist.rvs( size=(100000,)+self.sample_shape, **one_params)) smp2 = numpy.vstack(smp2) smp2 = smp2.reshape((utils.size_of_shape(self.shape), 100000) + self.sample_shape + self.cpu_dist.event_shape) smp2 = numpy.rollaxis( smp2, 0, smp2.ndim-len(self.cpu_dist.event_shape)) smp2 = smp2.reshape((100000,) + self.sample_shape + self.shape + self.cpu_dist.event_shape) else: smp2 = self.scipy_dist.rvs( size=(100000,) + self.sample_shape + self.shape, **self.scipy_params) array.assert_allclose(smp1.mean(axis=0), smp2.mean(axis=0), atol=3e-2, rtol=3e-2) array.assert_allclose(smp1.std(axis=0), smp2.std(axis=0), atol=3e-2, rtol=3e-2)
def check_sample(self, is_gpu): if is_gpu: smp1 = self.gpu_dist.sample(sample_shape=(100000, ) + self.sample_shape).data else: smp1 = self.cpu_dist.sample(sample_shape=(100000, ) + self.sample_shape).data if self.scipy_onebyone: smp2 = [] for one_params in self.scipy_onebyone_params_iter(): smp2.append( self.scipy_dist.rvs(size=(100000, ) + self.sample_shape, **one_params)) smp2 = numpy.vstack(smp2) smp2 = smp2.reshape((utils.size_of_shape(self.shape), 100000) + self.sample_shape + self.cpu_dist.event_shape) smp2 = numpy.rollaxis(smp2, 0, smp2.ndim - len(self.cpu_dist.event_shape)) smp2 = smp2.reshape((100000, ) + self.sample_shape + self.shape + self.cpu_dist.event_shape) else: smp2 = self.scipy_dist.rvs(size=(100000, ) + self.sample_shape + self.shape, **self.scipy_params) array.assert_allclose(smp1.mean(axis=0), smp2.mean(axis=0), atol=3e-2, rtol=3e-2) array.assert_allclose(smp1.std(axis=0), smp2.std(axis=0), atol=3e-2, rtol=3e-2)
def _as4darray(arr, mode): assert mode.cudnn_dim_ok if mode.is_for_conv2d: assert arr.ndim == 4 return arr else: # is_for_linear return arr.reshape(utils.size_of_shape(arr.shape[0:-1]), -1, 1, 1)
def __call__(self, array): if self.dtype is not None: assert array.dtype == self.dtype device = backend.get_device_from_array(array) if not array.shape: # 0-dim case array[...] = self.scale * (2 * numpy.random.randint(2) - 1) elif not array.size: raise ValueError('Array to be initialized must be non-empty.') else: # numpy.prod returns float value when the argument is empty. out_dim = len(array) in_dim = utils.size_of_shape(array.shape[1:]) if (in_dim > out_dim and self._checks[0]) or (in_dim < out_dim and self._checks[1]): raise ValueError('Cannot make orthogonal {}.' 'shape = {}, interpreted as ' '{}-dim input and {}-dim output.'.format( self.mode, array.shape, in_dim, out_dim)) transpose = in_dim > out_dim a = numpy.random.normal(size=(out_dim, in_dim)) if transpose: a = a.T # cupy.linalg.qr requires cusolver in CUDA 8+ q, r = numpy.linalg.qr(a) q *= numpy.copysign(self.scale, numpy.diag(r)) if transpose: q = q.T array[...] = device.xp.asarray(q.reshape(array.shape))
def linear(x, W, b=None, n_batch_axes=1): """Linear function, or affine transformation. It accepts two or three arguments: an input minibatch ``x``, a weight matrix ``W``, and optionally a bias vector ``b``. It computes .. math:: Y = xW^\\top + b. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable, which is a :math:`(s_1, s_2, \ ..., s_n)`-shaped float array. Its first ``n_batch_axes`` dimensions are handled as *minibatch dimensions*. The other dimensions are handled as concatenated one dimension whose size must be :math:`(s_{\\rm n\\_batch\\_axes} * ... * s_n = N)`. W (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Weight variable of shape :math:`(M, N)`, where :math:`(N = s_{\\rm n\\_batch\\_axes} * ... * s_n)`. b (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Bias variable (optional) of shape :math:`(M,)`. n_batch_axes (int): The number of batch axes. The default is 1. The input variable is reshaped into (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor. This should be greater than 0. Returns: ~chainer.Variable: Output variable. A float array with shape of :math:`(s_1, ..., s_{\\rm n\\_batch\\_axes}, M)`. .. seealso:: :class:`~chainer.links.Linear` .. admonition:: Example >>> x = np.random.uniform(0, 1, (3, 4)).astype(np.float32) >>> W = np.random.uniform(0, 1, (5, 4)).astype(np.float32) >>> b = np.random.uniform(0, 1, (5,)).astype(np.float32) >>> y = F.linear(x, W, b) >>> y.shape (3, 5) """ if n_batch_axes <= 0: raise ValueError('n_batch_axes should be greater than 0.') if n_batch_axes > 1: batch_shape = x.shape[:n_batch_axes] batch_size = utils.size_of_shape(batch_shape) x = x.reshape(batch_size, -1) elif x.ndim > 2: x = x.reshape(x.shape[0], -1) if b is None: args = x, W else: args = x, W, b y, = LinearFunction().apply(args) if n_batch_axes > 1: y = y.reshape(batch_shape + (-1,)) return y
def linear(x, W, b=None, n_batch_axes=1): """Linear function, or affine transformation. It accepts two or three arguments: an input minibatch ``x``, a weight matrix ``W``, and optionally a bias vector ``b``. It computes .. math:: Y = xW^\\top + b. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable, which is a :math:`(s_1, s_2, \ ..., s_n)`-shaped float array. Its first ``n_batch_axes`` dimensions are handled as *minibatch dimensions*. The other dimensions are handled as concatenated one dimension whose size must be :math:`(s_{\\rm n\\_batch\\_axes} * ... * s_n = N)`. W (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Weight variable of shape :math:`(M, N)`, where :math:`(N = s_{\\rm n\\_batch\\_axes} * ... * s_n)`. b (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Bias variable (optional) of shape :math:`(M,)`. n_batch_axes (int): The number of batch axes. The default is 1. The input variable is reshaped into (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor. This should be greater than 0. Returns: ~chainer.Variable: Output variable. A float array with shape of :math:`(s_1, ..., s_{\\rm n\\_batch\\_axes}, M)`. .. seealso:: :class:`~chainer.links.Linear` .. admonition:: Example >>> x = np.random.uniform(0, 1, (3, 4)).astype(np.float32) >>> W = np.random.uniform(0, 1, (5, 4)).astype(np.float32) >>> b = np.random.uniform(0, 1, (5,)).astype(np.float32) >>> y = F.linear(x, W, b) >>> y.shape (3, 5) """ if n_batch_axes <= 0: raise ValueError('n_batch_axes should be greater than 0.') if n_batch_axes > 1: batch_shape = x.shape[:n_batch_axes] batch_size = utils.size_of_shape(batch_shape) x = x.reshape(batch_size, -1) elif x.ndim > 2: x = x.reshape(x.shape[0], -1) if b is None: args = x, W else: args = x, W, b y, = LinearFunction().apply(args) if n_batch_axes > 1: y = y.reshape(batch_shape + (-1, )) return y
def _forward_grouped_convolution_xp(self, x, W, b, xp): # G: group count # N: batch size # iC: input channels # oC: output channels G = self.groups N, iC = x.shape[:2] oC = W.shape[0] k_size = W.shape[2:] iCg = iC // G oCg = oC // G dims = len(k_size) if iC % G != 0: raise TypeError('The number of groups must be ' 'a divisor of that of input channels') if oC % G != 0: raise TypeError('The number of groups must be ' 'a divisor of that of output channels') xp = backend.get_array_module(x) # (N, iC, k_size..., o_size...) x = conv_nd.im2col_nd(x, k_size, self.stride, self.pad, cover_all=self.cover_all, dilate=self.dilate) o_size = x.shape[-dims:] x = xp.rollaxis(x, 0, dims + 2) # (iC, k_size..., N, o_size...) mul_len = iCg * utils.size_of_shape(k_size) x = x.reshape(G, mul_len, N * utils.size_of_shape(o_size)) W = W.reshape(G, oCg, mul_len) # (G, oCg, N*o_size) = (G, oCg, iCg*k_size) @ (G, iCg*k_size, N*o_size) y = convolution_2d._matmul(W, x).astype(x.dtype, copy=False) y = y.reshape(oC, N, *o_size) y = xp.rollaxis(y, 1) # (N, oC, o_size...) if b is not None: y += b.reshape(1, b.size, *((1, ) * dims)) return y,
def forward(self, x, n_batch_axes=1): if self.W.array is None: in_size = utils.size_of_shape(x.shape[1:]) self._initialize_params(in_size) return thresholded_linear(x, self.W, self.b, n_batch_axes=n_batch_axes, threshold=self.threshold)
def forward(self, x, n_batch_axes=1): if self.W.array is None: in_size = utils.size_of_shape(x.shape[1:]) self._initialize_params(in_size) return ada_loss_linear(x, self.W, self.b, n_batch_axes=n_batch_axes, ada_loss=self.ada_loss)
def get_fans(shape): if not isinstance(shape, tuple): raise ValueError('shape must be tuple') if len(shape) < 2: raise ValueError('shape must be of length >= 2: shape={}', shape) receptive_field_size = utils.size_of_shape(shape[2:]) fan_in = shape[1] * receptive_field_size fan_out = shape[0] * receptive_field_size return fan_in, fan_out
def _forward_grouped_convolution_xp(self, x, W, b, xp): # G: group count # N: batch size # iC: input channels # oC: output channels G = self.groups N, iC = x.shape[:2] oC = W.shape[0] k_size = W.shape[2:] iCg = iC // G oCg = oC // G dims = len(k_size) if iC % G != 0: raise TypeError('The number of groups must be ' 'a divisor of that of input channels') if oC % G != 0: raise TypeError('The number of groups must be ' 'a divisor of that of output channels') xp = backend.get_array_module(x) # (N, iC, k_size..., o_size...) x = conv_nd.im2col_nd(x, k_size, self.stride, self.pad, cover_all=self.cover_all, dilate=self.dilate) o_size = x.shape[-dims:] x = xp.rollaxis(x, 0, dims + 2) # (iC, k_size..., N, o_size...) mul_len = iCg * utils.size_of_shape(k_size) x = x.reshape(G, mul_len, N * utils.size_of_shape(o_size)) W = W.reshape(G, oCg, mul_len) # (G, oCg, N*o_size) = (G, oCg, iCg*k_size) @ (G, iCg*k_size, N*o_size) y = convolution_2d._matmul(W, x).astype(x.dtype, copy=False) y = y.reshape(oC, N, *o_size) y = xp.rollaxis(y, 1) # (N, oC, o_size...) if b is not None: y += b.reshape(1, b.size, *((1,) * dims)) return y,
def get_fans(shape): if not isinstance(shape, tuple): raise ValueError( 'shape must be tuple. Actual type: {}'.format(type(shape))) if len(shape) < 2: raise ValueError( 'shape must be of length >= 2. Actual shape: {}'.format(shape)) receptive_field_size = utils.size_of_shape(shape[2:]) fan_in = shape[1] * receptive_field_size fan_out = shape[0] * receptive_field_size return fan_in, fan_out
def get_fans(shape): if not isinstance(shape, tuple): raise ValueError('shape must be tuple. Actual type: {}'.format( type(shape))) if len(shape) < 2: raise ValueError( 'shape must be of length >= 2. Actual shape: {}'.format(shape)) receptive_field_size = utils.size_of_shape(shape[2:]) fan_in = shape[1] * receptive_field_size fan_out = shape[0] * receptive_field_size return fan_in, fan_out
def _forward_grouped_convolution_xp(self, x, W, b, xp): # G: group count # N: batch size # xC: input channels # yC: output channels G = self.groups N, xC = x.shape[:2] x_size = x.shape[2:] yCg = W.shape[1] yC = yCg * G xCg = xC // G k_size = W.shape[2:] dims = len(k_size) if xC % G != 0: raise TypeError('The number of groups must be ' 'a divisor of that of input channels') x = xp.rollaxis(x, 1) # (xC, N, x_size...) x = x.reshape(G, xCg, N * utils.size_of_shape(x_size)) W = W.reshape(G, xCg, yCg * utils.size_of_shape(k_size)) W = W.transpose(0, 2, 1) # (G, yCg*k_size, xCg) # (G, yCg*k_size, N*x_size) = (G, yCg*k_size, xCg) @ (G, xCg, N*x_size) col = convolution_2d._matmul(W, x).astype(x.dtype, copy=False) col = col.reshape((yC, ) + k_size + (N, ) + x_size) col = xp.rollaxis(col, dims + 1) # (N, yC, k_size..., x_size...) y = conv_nd.col2im_nd(col, self.stride, self.pad, self.outs, dilate=self.dilate) if b is not None: y += b.reshape(1, yC, *((1, ) * dims)) return y,
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.W.array is None: with cuda.get_device_from_id(self._device_id): in_size = utils.size_of_shape(x.shape[1:]) self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than' 'the size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis(self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with cuda.get_device_from_id(self._device_id): self.c = variable.Variable( xp.zeros((batch, self.state_size), dtype=x.dtype)) self.c, y = lstm.lstm(self.c, lstm_in) if h_rest is None: self.h = y elif len(y.array) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.W.array is None: with chainer.using_device(self.device): in_size = utils.size_of_shape(x.shape[1:]) self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than' 'the size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis( self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: with chainer.using_device(self.device): self.c = variable.Variable( self.xp.zeros((batch, self.state_size), dtype=x.dtype)) self.c, y = lstm.lstm(self.c, lstm_in) if h_rest is None: self.h = y elif len(y.array) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def forward(self, x, n_batch_axes=1): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. n_batch_axes (int): The number of batch axes. The default is 1. The input variable is reshaped into (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor. This should be greater than 0. Returns: ~chainer.Variable: Output of the linear layer. """ if self.W.data is None: in_size = utils.size_of_shape(x.shape[1:]) self._initialize_params(in_size) return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)
def forward(self, x, n_batch_axes=1): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. n_batch_axes (int): The number of batch axes. The default is 1. The input variable is reshaped into (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor. This should be greater than 0. Returns: ~chainer.Variable: Output of the linear layer. """ if self.W.array is None: in_size = utils.size_of_shape(x.shape[1:]) self._initialize_params(in_size) return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)
def forward(self, x): """Apply layer normalization to given input. Args: x (~chainer.Variable): Batch vectors. Shape of this value must be `(batch_size, unit_size)`, e.g., the output of :func:`~chainer.functions.linear`. Returns: ~chainer.Variable: Output of the layer normalization. """ if self.gamma.array is None: in_size = utils.size_of_shape(x.shape[1:]) self._initialize_params(in_size) return layer_normalization.layer_normalization( x, self.gamma, self.beta, self.eps)
def forward(self, x): """Apply layer normalization to given input. Args: x (~chainer.Variable): Batch vectors. Shape of this value must be `(batch_size, unit_size)`, e.g., the output of :func:`~chainer.functions.linear`. Returns: ~chainer.Variable: Output of the layer normalization. """ if self.gamma.data is None: in_size = utils.size_of_shape(x.shape[1:]) self._initialize_params(in_size) return layer_normalization.layer_normalization(x, self.gamma, self.beta, self.eps)
def forward(self, xs): xp = backend.get_array_module(*xs) if self.length is None: length = max(len(x) for x in xs) else: length = self.length shape = (len(xs), length) + xs[0].shape[1:] y = xp.empty(shape, xs[0].dtype) if length == 0: return y, # y is an empty array if xp is numpy or any(not x._c_contiguous for x in xs): for i, x in enumerate(xs): l = len(x) if l == length: y[i] = x else: y[i, 0:l] = x y[i, l:] = self.padding else: # This code assumes that all arrays are c_contiguous ptr_shape = (Ellipsis,) + (None,) * xs[0].ndim ptrs = cuda.cupy.array( [x.data for x in xs], numpy.uintp)[ptr_shape] lengths = cuda.cupy.array( [len(x) for x in xs], numpy.int32)[ptr_shape] base = utils.size_of_shape(xs[0].shape[1:]) cuda.elementwise( 'P ptr, int32 length, T pad, int32 base, int32 max_length', 'T y', ''' int d = i / base % max_length; if (d < length) { y = reinterpret_cast<const T*>(ptr)[i % (base * max_length)]; } else { y = pad; } ''', 'pad_sequence_fwd' )(ptrs, lengths, self.padding, base, length, y) return y,
def ada_loss_linear(x, W, b=None, n_batch_axes=1, ada_loss=None): """ Simply replace the LinearFunction in linear to AdaLossLinear """ if n_batch_axes <= 0: raise ValueError('n_batch_axes should be greater than 0.') if n_batch_axes > 1: batch_shape = x.shape[:n_batch_axes] batch_size = utils.size_of_shape(batch_shape) x = x.reshape(batch_size, -1) elif x.ndim > 2: x = x.reshape(x.shape[0], -1) if b is None: args = x, W else: args = x, W, b y, = AdaLossLinearFunction(ada_loss=ada_loss).apply(args) if n_batch_axes > 1: y = y.reshape(batch_shape + (-1, )) return y
def thresholded_linear(x, W, b=None, n_batch_axes=1, threshold=6e-8): """ """ if n_batch_axes <= 0: raise ValueError('n_batch_axes should be greater than 0.') if n_batch_axes > 1: batch_shape = x.shape[:n_batch_axes] batch_size = utils.size_of_shape(batch_shape) x = x.reshape(batch_size, -1) elif x.ndim > 2: x = x.reshape(x.shape[0], -1) if b is None: args = x, W else: args = x, W, b y, = ThresholdedLinearFunction(threshold=threshold).apply(args) if n_batch_axes > 1: y = y.reshape(batch_shape + (-1, )) return y
def __call__(self, array): if self.dtype is not None: assert array.dtype == self.dtype xp = backend.get_array_module(array) if not array.shape: # 0-dim case array[...] = self.scale * (2 * numpy.random.randint(2) - 1) elif not array.size: raise ValueError('Array to be initialized must be non-empty.') else: # numpy.prod returns float value when the argument is empty. flat_shape = (len(array), utils.size_of_shape(array.shape[1:])) if flat_shape[0] > flat_shape[1]: raise ValueError('Cannot make orthogonal system because' ' # of vectors ({}) is larger than' ' that of dimensions ({})'.format( flat_shape[0], flat_shape[1])) a = numpy.random.normal(size=flat_shape) # cupy.linalg.qr requires cusolver in CUDA 8+ q, r = numpy.linalg.qr(a.T) q *= numpy.copysign(self.scale, numpy.diag(r)) array[...] = xp.asarray(q.T.reshape(array.shape))
def __call__(self, array): if self.dtype is not None: assert array.dtype == self.dtype,\ '{} != {}'.format(array.dtype, self.dtype) if not array.shape: # 0-dim case if self.rng is None: a = numpy.random.randint(2) else: a = self.rng.randint(2) a = int(a) array[...] = self.scale * (2 * a - 1) elif not array.size: raise ValueError('Array to be initialized must be non-empty.') else: # numpy.prod returns float value when the argument is empty. out_dim = len(array) in_dim = utils.size_of_shape(array.shape[1:]) if (in_dim > out_dim and self._checks[0]) or ( in_dim < out_dim and self._checks[1]): raise ValueError( 'Cannot make orthogonal {}. ' 'shape = {}, interpreted as ' '{}-dim input and {}-dim output.'.format( self.mode, array.shape, in_dim, out_dim)) transpose = in_dim > out_dim if self.rng is None: a = numpy.random.normal(size=(out_dim, in_dim)) else: a_tmp = self.rng.normal(size=(out_dim, in_dim)) a = numpy.empty(a_tmp.shape, dtype=a_tmp.dtype) backend.copyto(a, a_tmp) if transpose: a = a.T # cupy.linalg.qr requires cusolver in CUDA 8+ q, r = numpy.linalg.qr(a) q *= numpy.copysign(self.scale, numpy.diag(r)) if transpose: q = q.T backend.copyto(array, q.reshape(array.shape).astype( array.dtype, copy=False))
def forward(self, inputs): gy, = inputs xp = backend.get_array_module(gy) repeats = self.repeats axis = self.axis shape = list(self.in_shape) dtype = self.in_dtype if len(gy) == 0: gx = xp.zeros(shape, dtype) return gx, if len(repeats) == 1: repeats = int(repeats[0]) if axis is None: gx = gy.reshape(-1, repeats).sum(axis=1).reshape(shape) else: shape[axis:axis + 1] = [-1, repeats] gx = gy.reshape(shape).sum(axis=axis + 1) return gx, if axis is None: pos = 0 gx = xp.zeros(utils.size_of_shape(shape), dtype) for (i, r) in enumerate(repeats): gx[i] = xp.sum(gy[pos:pos + r]) pos += r gx = gx.reshape(shape) else: gx = xp.zeros(shape, dtype) pos = 0 src = [slice(None)] * axis + [None] dst = [slice(None)] * axis + [None] for (i, r) in enumerate(repeats): src[-1] = slice(pos, pos + r) dst[-1] = slice(i, i + 1) gx[tuple(dst)] = gy[tuple(src)].sum(axis=axis, keepdims=True) pos += r return gx,
def __call__(self, array): if self.dtype is not None: assert array.dtype == self.dtype xp = backend.get_array_module(array) if not array.shape: # 0-dim case array[...] = self.scale elif not array.size: raise ValueError('Array to be initialized must be non-empty.') else: # numpy.prod returns float value when the argument is empty. flat_shape = (len(array), utils.size_of_shape(array.shape[1:])) if flat_shape[0] > flat_shape[1]: raise ValueError('Cannot make orthogonal system because' ' # of vectors ({}) is larger than' ' that of dimensions ({})'.format( flat_shape[0], flat_shape[1])) a = numpy.random.normal(size=flat_shape) # we do not have cupy.linalg.svd for now u, _, v = numpy.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v array[...] = xp.asarray(q.reshape(array.shape)) array *= self.scale
def _get_tensor4d_shape(axis, shape): left_shape = utils.size_of_shape(shape[:axis]) center_shape = shape[axis] right_shape = utils.size_of_shape(shape[axis:][1:]) return left_shape, center_shape, right_shape, 1