def __init__(self, n_layers, in_size, out_size, dropout, use_bi_direction, **kwargs): argument.check_unexpected_kwargs( kwargs, use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) weights = [] direction = 2 if use_bi_direction else 1 for i in six.moves.range(n_layers): for di in six.moves.range(direction): weight = link.Link() with weight.init_scope(): for j in six.moves.range(6): if i == 0 and j < 3: w_in = in_size elif i > 0 and j < 3: w_in = out_size * direction else: w_in = out_size w = variable.Parameter( normal.Normal(numpy.sqrt(1. / w_in)), (out_size, w_in)) b = variable.Parameter(0, (out_size,)) setattr(weight, 'w%d' % j, w) setattr(weight, 'b%d' % j, b) weights.append(weight) super(NStepGRUBase, self).__init__(*weights) self.n_layers = n_layers self.dropout = dropout self.out_size = out_size self.direction = direction self.rnn = rnn.n_step_bigru if use_bi_direction else rnn.n_step_gru
def spatial_transformer_sampler(x, grid, **kwargs): """2D Spatial Transformer sampler. This is a differentiable image sampler. With a set of sampling points ``grid`` and an input feature map ``x``, this produces a sampled output feature map. This function currently only supports bilinear interpolation as a sampling kernel. When coordinates in ``grid`` is outside range :math:`[-1, 1]`, values are sampled from a zero padded input image. Notation: here is a notation for dimensionalities. - :math:`n` is the batch size. - :math:`c_I` is the number of the input channels. - :math:`h` and :math:`w` are the height and width of the input image, respectively. - :math:`h_O` and :math:`w_O` are the height and width of the output image. See detail in the following paper: `Spatial Transformer Networks \ <https://arxiv.org/abs/1506.02025>`_. .. note:: cuDNN supports SpatialTransformerSampler from version 5.0.0. Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable of shape :math:`(n, c_I, h, w)`. grid (~chainer.Variable): Coordinate variable of shape :math:`(n, 2, h_O, w_O)`. Each coordinate defines the spatial location in the input where a sampling kernel is applied to get the value at a particular pixel in the output. ``grid[idx, :, i, j]`` corresponds to the coordinate that is used to sample the values for an output pixel at location :math:`(i, j)`. In the second dimension, the first coordinate corresponds to the location along the horizontal axis, and the second coordinate corresponds to the location along the vertical axis. The coordinate :math:`(-1, -1)` corresponds to the upper-left corner of the input image. Returns: ~chainer.Variable: Output feature map of shape \ :math:`(n, c_I, h_O, w_O)`. """ if kwargs: argument.check_unexpected_kwargs( kwargs, use_cudnn='The argument "use_cudnn" is not ' 'supported anymore. ' 'Use chainer.using_config(\'use_cudnn\', value) ' 'context where value can be `always`, `never`, or `auto`.') argument.assert_kwargs_empty(kwargs) return SpatialTransformerSampler()(x, grid)
def zoneout(h, x, ratio=.5, **kwargs): """zoneout(h, x, ratio=.5) Drops elements of input variable and sets to previous variable randomly. This function drops input elements randomly with probability ``ratio`` and instead sets dropping element to their previous variable. In testing mode , it does nothing and just returns ``x``. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: h (~chainer.Variable): Previous variable. x (~chainer.Variable): Input variable. ratio (float): Zoneout ratio. Returns: ~chainer.Variable: Output variable. See the paper: `Zoneout: Regularizing RNNs by Randomly Preserving Hidden \ Activations <https://arxiv.org/abs/1606.01305>`_. """ argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if configuration.config.train: return Zoneout(ratio)(h, x) return x
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, nobias=False, outsize=None, initialV=None, **kwargs): super(Deconvolution2D, self).__init__() argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.outsize = (None, None) if outsize is None else outsize self.out_channels = out_channels self.nobias = nobias with self.init_scope(): V_initializer = initializers._get_initializer(initialV) self.V = variable.Parameter(V_initializer) if in_channels is not None: kh, kw = _pair(self.ksize) V_shape = (in_channels, self.out_channels, kh, kw) self.V.initialize(V_shape) self.b = None if nobias else variable.Parameter(None) self.g = variable.Parameter(None)
def dropout(x, ratio=.5, **kwargs): """dropout(x, ratio=.5) Drops elements of input variable randomly. This function drops input elements randomly with probability ``ratio`` and scales the remaining elements by factor ``1 / (1 - ratio)``. In testing mode, it does nothing and just returns ``x``. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (~chainer.Variable): Input variable. ratio (float): Dropout ratio. Returns: ~chainer.Variable: Output variable. See the paper by G. Hinton: `Improving neural networks by preventing \ co-adaptation of feature detectors <https://arxiv.org/abs/1207.0580>`_. """ argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if configuration.config.train: return Dropout(ratio)(x) return x
def __init__(self, n_layers, in_size, out_size, dropout, use_bi_direction, activation, **kwargs): argument.check_unexpected_kwargs( kwargs, use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) weights = [] direction = 2 if use_bi_direction else 1 for i in six.moves.range(n_layers): for di in six.moves.range(direction): weight = link.Link() for j in six.moves.range(2): if i == 0 and j < 1: w_in = in_size elif i > 0 and j < 1: w_in = out_size * direction else: w_in = out_size weight.add_param('w%d' % j, (out_size, w_in)) weight.add_param('b%d' % j, (out_size,)) getattr(weight, 'w%d' % j).data[...] = numpy.random.normal( 0, numpy.sqrt(1. / w_in), (out_size, w_in)) getattr(weight, 'b%d' % j).data[...] = 0 weights.append(weight) super(NStepRNNBase, self).__init__(*weights) self.n_layers = n_layers self.dropout = dropout self.activation = activation self.out_size = out_size self.direction = direction self.rnn = rnn.n_step_birnn if use_bi_direction else rnn.n_step_rnn
def dropout(x, ratio=.5, **kwargs): """dropout(x, ratio=.5) Drops elements of input variable randomly. This function drops input elements randomly with probability ``ratio`` and scales the remaining elements by factor ``1 / (1 - ratio)``. In testing mode, it does nothing and just returns ``x``. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', boolean)``. See :func:`chainer.using_config`. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable. A :math:`(s_1, s_2, ..., s_N)` -shaped float array. ratio (float): Dropout ratio. The ``ratio`` must be ``0.0 <= ratio < 1.0``. Returns: ~chainer.Variable: Output variable. See the paper by G. Hinton: `Improving neural networks by preventing \ co-adaptation of feature detectors <https://arxiv.org/abs/1207.0580>`_. .. admonition:: Example >>> x = np.array([[-1, 0], [2, -3], [-2, 1]], 'f') >>> with chainer.using_config('train', True): ... y = F.dropout(x) >>> y.data array([[-2., 0.], [ 4., -6.], [-0., 2.]], dtype=float32) >>> with chainer.using_config('train', True): ... y = F.dropout(x, ratio=0.0) \ # dropout returns original input if ratio=0.0 >>> (x == y.data).all() True >>> with chainer.using_config('train', False): ... y = F.dropout(x) \ # dropout in test mode returns original input >>> (x == y.data).all() True """ argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if configuration.config.train: return Dropout(ratio).apply((x, ))[0] return chainer.as_variable(x)
def __init__(self, n_layers, in_size, out_size, dropout, use_bi_direction, **kwargs): argument.check_unexpected_kwargs( kwargs, use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) weights = [] direction = 2 if use_bi_direction else 1 for i in six.moves.range(n_layers): for di in six.moves.range(direction): weight = link.Link() with weight.init_scope(): for j in six.moves.range(6): if i == 0 and j < 3: w_in = in_size elif i > 0 and j < 3: w_in = out_size * direction else: w_in = out_size w = variable.Parameter( normal.Normal(numpy.sqrt(1. / w_in)), (out_size, w_in)) b = variable.Parameter(0, (out_size, )) setattr(weight, 'w%d' % j, w) setattr(weight, 'b%d' % j, b) w_in = out_size w = variable.Parameter( normal.Normal(numpy.sqrt(1. / w_in)), (out_size, w_in)) b = variable.Parameter(0, (out_size, )) setattr(weight, 'w%d' % 6, w) setattr(weight, 'b%d' % 6, b) w_in = out_size w = variable.Parameter( normal.Normal(numpy.sqrt(1. / w_in)), (out_size, w_in)) b = variable.Parameter(0, (out_size, )) setattr(weight, 'w%d' % 7, w) setattr(weight, 'b%d' % 7, b) w_in = out_size * 2 w = variable.Parameter( normal.Normal(numpy.sqrt(1. / w_in)), (out_size, w_in)) b = variable.Parameter(0, (out_size, )) setattr(weight, 'w%d' % 8, w) setattr(weight, 'b%d' % 8, b) weights.append(weight) super(NStepGRUBase, self).__init__(*weights) self.n_layers = n_layers self.dropout = dropout self.out_size = out_size self.direction = direction self.rnn = rnn.n_step_bigru if use_bi_direction else rnn.n_step_gru
def __call__(self, x, layers=None, **kwargs): """__call__(self, x, layers=['prob']) Computes all the feature maps specified by ``layers``. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (~chainer.Variable): Input variable. It should be prepared by ``prepare`` function. layers (list of str): The list of layer names you want to extract. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['prob'] argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) h = x activations = {} inception_4a_cache = None inception_4d_cache = None target_layers = set(layers) for key, funcs in self.functions.items(): if len(target_layers) == 0: break if key == 'loss1_fc2': h = inception_4a_cache elif key == 'loss2_fc2': h = inception_4d_cache for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) if key == 'inception_4a': inception_4a_cache = h elif key == 'inception_4d': inception_4d_cache = h return activations
def forward(self, x, layers=None, **kwargs): """forward(self, x, layers=['prob']) Computes all the feature maps specified by ``layers``. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (~chainer.Variable): Input variable. It should be prepared by ``prepare`` function. layers (list of str): The list of layer names you want to extract. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['prob'] if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) h = x activations = {} inception_4a_cache = None inception_4d_cache = None target_layers = set(layers) for key, funcs in self.functions.items(): if len(target_layers) == 0: break if key == 'loss1_fc2': h = inception_4a_cache elif key == 'loss2_fc2': h = inception_4d_cache for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) if key == 'inception_4a': inception_4a_cache = h elif key == 'inception_4d': inception_4d_cache = h return activations
def spatial_transformer_grid(theta, output_shape, **kwargs): """2D Spatial Transformer grid. This function generates coordinates of the points sampled from an image to perform warping described in `Spatial Transformer Networks \ <https://arxiv.org/abs/1506.02025>`_. Given a coordinate in the warped image :math:`(x_i^t, y_i^t)`, the point sampled from the source image :math:`(x_i^s, y_i^s)` are calculated by the following equation. .. note:: cuDNN supports SpatialTransformerGrid from version 5.0.0. .. math:: \\left(\\begin{matrix} x_i^s \\\\ y_i^s \\end{matrix}\\right) = \\left(\\begin{matrix} \\theta_{11} & \\theta_{12} & \\theta_{13} \\\\ \\theta_{21} & \\theta_{22} & \\theta_{23} \\end{matrix}\\right) \\left(\\begin{matrix} x_i^t \\\\ y_i^t \\\\ 1 \\end{matrix}\\right) Notatition: here is a notation for dimensionalities. - :math:`n` is the batch size. - :math:`h_O` and :math:`w_O` are the height and the width of the output image. Args: theta (:class:`~chainer.Variable` or :ref:`ndarray`): An array of shape :math:`(n, 2, 3)`. This is a batch of :math:`2 \\times 3` matrix used for the warping described above. output_shape (tuple): A tuple of 2 elements: :math:`h_O, w_O`. Returns: ~chainer.Variable: A variable of shape :math:`(n, 2, h_O, w_O)`. In the 2nd dimension, the first element is the coordinate along the x axis, and the second element is the coordinate along the y axis. All the coordinates in the image are scaled to fit range :math:`[-1, 1]`. This means that the coordinate :math:`(-1, -1)` corresponds to the upper-left corner of the input image. """ if kwargs: argument.check_unexpected_kwargs( kwargs, use_cudnn="The argument \"use_cudnn\" is not " "supported anymore. " "Use chainer.using_config('use_cudnn', value) " "context where value can be `always`, `never`, or `auto`.") argument.assert_kwargs_empty(kwargs) return SpatialTransformerGrid(output_shape)(theta)
def spatial_transformer_sampler_interp(x, grid, **kwargs): argument.check_unexpected_kwargs( kwargs, use_cudnn="The argument \"use_cudnn\" is not " "supported anymore. " "Use chainer.using_config('use_cudnn', value) " "context where value can be `always`, `never`, or `auto`.") argument.assert_kwargs_empty(kwargs) return SpatialTransformerSamplerInterp()(x, grid)
def dropout(x, ratio=.5, **kwargs): """dropout(x, ratio=.5) Drops elements of input variable randomly. This function drops input elements randomly with probability ``ratio`` and scales the remaining elements by factor ``1 / (1 - ratio)``. In testing mode, it does nothing and just returns ``x``. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', boolean)``. See :func:`chainer.using_config`. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable. A :math:`(s_1, s_2, ..., s_N)` -shaped float array. ratio (float): Dropout ratio. The ``ratio`` must be ``0.0 <= ratio < 1.0``. Returns: ~chainer.Variable: Output variable. See the paper by G. Hinton: `Improving neural networks by preventing \ co-adaptation of feature detectors <https://arxiv.org/abs/1207.0580>`_. .. admonition:: Example >>> x = np.array([[-1, 0], [2, -3], [-2, 1]], np.float32) >>> with chainer.using_config('train', True): ... y = F.dropout(x) >>> y.data array([[-2., 0.], [ 4., -6.], [-0., 2.]], dtype=float32) >>> with chainer.using_config('train', True): ... y = F.dropout(x, ratio=0.0) \ # dropout returns original input if ratio=0.0 >>> (x == y.data).all() True >>> with chainer.using_config('train', False): ... y = F.dropout(x) \ # dropout in test mode returns original input >>> (x == y.data).all() True """ argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if configuration.config.train: return Dropout(ratio).apply((x,))[0] return chainer.as_variable(x)
def spatial_transformer_grid(theta, output_shape, **kwargs): """2D Spatial Transformer grid. This function generates coordinates of the points sampled from an image to perform warping described in `Spatial Transformer Networks \ <https://arxiv.org/abs/1506.02025>`_. Given a coordinate in the warped image :math:`(x_i^t, y_i^t)`, the point sampled from the source image :math:`(x_i^s, y_i^s)` are calculated by the following equation. .. note:: cuDNN supports SpatialTransformerGrid from version 5.0.0. .. math:: \\left(\\begin{matrix} x_i^s \\\\ y_i^s \\end{matrix}\\right) = \\left(\\begin{matrix} \\theta_{11} & \\theta_{12} & \\theta_{13} \\\\ \\theta_{21} & \\theta_{22} & \\theta_{23} \\end{matrix}\\right) \\left(\\begin{matrix} x_i^t \\\\ y_i^t \\\\ 1 \\end{matrix}\\right) Notation: here is a notation for dimensionalities. - :math:`n` is the batch size. - :math:`h_O` and :math:`w_O` are the height and the width of the output image. Args: theta (:class:`~chainer.Variable` or :ref:`ndarray`): An array of shape :math:`(n, 2, 3)`. This is a batch of :math:`2 \\times 3` matrix used for the warping described above. output_shape (tuple): A tuple of 2 elements: :math:`h_O, w_O`. Returns: ~chainer.Variable: A variable of shape :math:`(n, 2, h_O, w_O)`. In the 2nd dimension, the first element is the coordinate along the x axis, and the second element is the coordinate along the y axis. All the coordinates in the image are scaled to fit range :math:`[-1, 1]`. This means that the coordinate :math:`(-1, -1)` corresponds to the upper-left corner of the input image. """ if kwargs: argument.check_unexpected_kwargs( kwargs, use_cudnn='The argument "use_cudnn" is not ' 'supported anymore. ' 'Use chainer.using_config(\'use_cudnn\', value) ' 'context where value can be `always`, `never`, or `auto`.') argument.assert_kwargs_empty(kwargs) return SpatialTransformerGrid(output_shape)(theta)
def __call__(self, hx, cx, xs, **kwargs): """__call__(self, hx, cx, xs) Calculate all hidden states and cell states. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: hx (~chainer.Variable or None): Initial hidden states. If ``None`` is specified zero-vector is used. cx (~chainer.Variable or None): Initial cell states. If ``None`` is specified zero-vector is used. xs (list of ~chainer.Variable): List of input sequences. Each element ``xs[i]`` is a :class:`chainer.Variable` holding a sequence. """ argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) assert isinstance(xs, (list, tuple)) xp = cuda.get_array_module(hx, *xs) indices = n_step_rnn.argsort_list_descent(xs) indices_array = xp.array(indices) xs = n_step_rnn.permutate_list(xs, indices, inv=False) if hx is None: hx = self.init_hx(xs) else: hx = permutate.permutate(hx, indices_array, axis=1, inv=False) if cx is None: cx = self.init_hx(xs) else: cx = permutate.permutate(cx, indices_array, axis=1, inv=False) trans_x = transpose_sequence.transpose_sequence(xs) ws = [[w.w0, w.w1, w.w2, w.w3, w.w4, w.w5, w.w6, w.w7] for w in self] bs = [[w.b0, w.b1, w.b2, w.b3, w.b4, w.b5, w.b6, w.b7] for w in self] hy, cy, trans_y = self.rnn(self.n_layers, self.dropout, hx, cx, ws, bs, trans_x) hy = permutate.permutate(hy, indices_array, axis=1, inv=True) cy = permutate.permutate(cy, indices_array, axis=1, inv=True) ys = transpose_sequence.transpose_sequence(trans_y) ys = n_step_rnn.permutate_list(ys, indices, inv=True) return hy, cy, ys
def spatial_dropout(x, ratio=.1, **kwargs): """spatial_dropout(x, ratio=.1)""" argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if configuration.config.train: return SpatialDropout(ratio).apply((x,))[0] return chainer.as_variable(x)
def forward(self, inputs, outputs, disable=(), **kwargs): """forward(self, inputs, outputs, disable=()) Executes a sub-network of the network. This function acts as an interpreter of the network definition for Caffe. On execution, it interprets each layer one by one, and if the bottom blobs are already computed, then emulates the layer and stores output blobs as :class:`~chainer.Variable` objects. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: inputs (dict): A dictionary whose key-value pairs indicate initial correspondences between blob names and :class:`~chainer.Variable` objects. outputs (Iterable): A list of blob names whose corresponding :class:`~chainer.Variable` objects are returned. disable (Iterable): A list of layer names that will be ignored during the forward computation. Returns: tuple: A tuple of output :class:`~chainer.Variable` objects corresponding to elements of the `outputs` argument. """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) variables = dict(inputs) disable = set(disable) for func_name, bottom, top in self.layers: if (func_name in disable or func_name not in self.forwards or any(blob not in variables for blob in bottom)): continue func = self.forwards[func_name] input_vars = tuple(variables[blob] for blob in bottom) output_vars = func(*input_vars) if not isinstance(output_vars, (tuple, list)): output_vars = output_vars, for var, name in zip(output_vars, top): variables[name] = var self.variables = variables return tuple(variables[blob] for blob in outputs)
def __init__(self, stride=1, pad=0, cover_all=False, **kwargs): argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.cover_all = cover_all
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``forward`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``forward`` functions. Unlike ``predict`` method, this method does not override ``chainer.config.train`` and ``chainer.config.enable_backprop`` configuration. If you want to extract features without updating model parameters, you need to manually set configuration when calling this method as follows: .. code-block:: python # model is an instance of ResNetLayers (50 or 101 or 152 layers) with chainer.using_config('train', False): with chainer.using_config('enable_backprop', False): feature = model.extract([image]) Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def forward(self, inputs, outputs, disable=(), **kwargs): """forward(self, inputs, outputs, disable=()) Executes a sub-network of the network. This function acts as an interpreter of the network definition for Caffe. On execution, it interprets each layer one by one, and if the bottom blobs are already computed, then emulates the layer and stores output blobs as :class:`~chainer.Variable` objects. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: inputs (dict): A dictionary whose key-value pairs indicate initial correspondences between blob names and :class:`~chainer.Variable` objects. outputs (Iterable): A list of blob names whose corresponding :class:`~chainer.Variable` objects are returned. disable (Iterable): A list of layer names that will be ignored during the forward computation. Returns: tuple: A tuple of output :class:`~chainer.Variable` objects corresponding to elements of the `outputs` argument. """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) variables = dict(inputs) disable = set(disable) for func_name, bottom, top in self.layers: if (func_name in disable or func_name not in self.forwards or any(blob not in variables for blob in bottom)): continue func = self.forwards[func_name] input_vars = tuple(variables[blob] for blob in bottom) output_vars = func(*input_vars) if not isinstance(output_vars, collections_abc.Iterable): output_vars = output_vars, for var, name in zip(output_vars, top): variables[name] = var self.variables = variables return tuple(variables[blob] for blob in outputs)
def __call__(self, hx, cx, xs, **kwargs): """__call__(self, hx, cx, xs) Calculate all hidden states and cell states. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: hx (~chainer.Variable or None): Initial hidden states. If ``None`` is specified zero-vector is used. cx (~chainer.Variable or None): Initial cell states. If ``None`` is specified zero-vector is used. xs (list of ~chianer.Variable): List of input sequences. Each element ``xs[i]`` is a :class:`chainer.Variable` holding a sequence. """ argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) assert isinstance(xs, (list, tuple)) indices = n_step_rnn.argsort_list_descent(xs) xs = n_step_rnn.permutate_list(xs, indices, inv=False) if hx is None: hx = self.init_hx(xs) else: hx = permutate.permutate(hx, indices, axis=1, inv=False) if cx is None: cx = self.init_hx(xs) else: cx = permutate.permutate(cx, indices, axis=1, inv=False) trans_x = transpose_sequence.transpose_sequence(xs) ws = [[w.w0, w.w1, w.w2, w.w3, w.w4, w.w5, w.w6, w.w7] for w in self] bs = [[w.b0, w.b1, w.b2, w.b3, w.b4, w.b5, w.b6, w.b7] for w in self] hy, cy, trans_y = self.rnn( self.n_layers, self.dropout, hx, cx, ws, bs, trans_x) hy = permutate.permutate(hy, indices, axis=1, inv=True) cy = permutate.permutate(cy, indices, axis=1, inv=True) ys = transpose_sequence.transpose_sequence(trans_y) ys = n_step_rnn.permutate_list(ys, indices, inv=True) return hy, cy, ys
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``forward`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``forward`` functions. Unlike ``predict`` method, this method does not override ``chainer.config.train`` and ``chainer.config.enable_backprop`` configuration. If you want to extract features without updating model parameters, you need to manually set configuration when calling this method as follows: .. code-block:: python # model is an instance of `GoogLeNet` with chainer.using_config('train', False): with chainer.using_config('enable_backprop', False): feature = model.extract([image]) Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def convolution_2d(x, V, g, b=None, stride=1, pad=0, cover_all=False, **kwargs): argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) func = Convolution2DFunction(stride, pad, cover_all) if b is None: return func(x, V, g) else: return func(x, V, g, b)
def deconvolution_2d(x, V, g, b=None, stride=1, pad=0, outsize=None, **kwargs): argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) func = Deconvolution2DFunction(stride, pad, outsize) if b is None: return func(x, V, g) else: return func(x, V, g, b)
def __init__(self, stride=1, pad=0, outsize=None, requires_x_grad=True, **kwargs): argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.outh, self.outw = (None, None) if outsize is None else outsize self.requires_x_grad = requires_x_grad
def _call(self, hs, xs, **kwargs): """Calls RNN function. Args: hs (list of ~chainer.Variable or None): Lisit of hidden states. Its length depends on its implementation. If ``None`` is specified zero-vector is used. xs (list of ~chainer.Variable): List of input sequences. Each element ``xs[i]`` is a :class:`chainer.Variable` holding a sequence. Returns: tuple: hs """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) assert isinstance(xs, (list, tuple)) xp = cuda.get_array_module(*(list(hs) + list(xs))) indices = argsort_list_descent(xs) indices_array = xp.array(indices) xs = permutate_list(xs, indices, inv=False) hxs = [] for hx in hs: if hx is None: hx = self.init_hx(xs) else: hx = permutate.permutate(hx, indices_array, axis=1, inv=False) hxs.append(hx) trans_x = transpose_sequence.transpose_sequence(xs) args = [self.n_layers, self.dropout] + hxs + \ [self.ws, self.bs, trans_x] result = self.rnn(*args) hys = [ permutate.permutate(h, indices_array, axis=1, inv=True) for h in result[:-1] ] trans_y = result[-1] ys = transpose_sequence.transpose_sequence(trans_y) ys = permutate_list(ys, indices, inv=True) return hys, ys
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``__call__`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``__call__`` functions. .. warning:: ``train`` and ``volatile`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('enable_backprop', not volatile)`` respectively. See :func:`chainer.using_config`. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def make_extension(trigger=None, default_name=None, priority=None, finalizer=None, initializer=None, on_error=None, **kwargs): """Decorator to make given functions into trainer extensions. This decorator just adds some attributes to a given function. The value of the attributes are given by the arguments of this decorator. See :class:`Extension` for details of trainer extensions. Most of the default values of arguments also follow those for this class. Args: trigger: Default trigger of the extension. default_name: Default name of the extension. The name of a given function is used by default. priority (int): Default priority of the extension. finalizer: Finalizer function of this extension. It is called at the end of the training loop. initializer: Initializer function of this extension. It is called at the beginning of the training loop. on_error: Error handler callback function of this extension. It is called after an error is raised during the trainer loop. """ if kwargs: msg = ('invoke_before_training has been removed since Chainer v2.0.0. ' 'Use initializer= instead.') argument.check_unexpected_kwargs(kwargs, invoke_before_training=msg) argument.assert_kwargs_empty(kwargs) if trigger is None: trigger = Extension.trigger if priority is None: priority = Extension.priority def decorator(ext): ext.trigger = trigger ext.default_name = default_name or ext.__name__ ext.priority = priority ext.finalize = finalizer ext.on_error = on_error ext.initialize = initializer return ext return decorator
def __init__(self, in_size, out_size, c_ratio=0.5, h_ratio=0.5, **kwargs): argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) super(StatefulZoneoutLSTM, self).__init__() self.state_size = out_size self.c_ratio = c_ratio self.h_ratio = h_ratio self.reset_state() with self.init_scope(): self.upward = linear.Linear(in_size, 4 * out_size) self.lateral = linear.Linear(out_size, 4 * out_size, nobias=True)
def __init__(self, stride=1, pad=0, outsize=None, **kwargs): argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) context " "where value is either `True` or `False`.", requires_x_grad="requires_x_grad argument is not supported " "anymore. Just remove the argument. Note that whether to compute " "the gradient w.r.t. x is automatically decided during " "backpropagation.") argument.assert_kwargs_empty(kwargs) self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.outh, self.outw = (None, None) if outsize is None else outsize
def __init__(self, n_layers, in_size, out_size, dropout, **kwargs): if kwargs: argument.check_unexpected_kwargs( kwargs, use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config', use_bi_direction='use_bi_direction is not supported anymore', activation='activation is not supported anymore') argument.assert_kwargs_empty(kwargs) weights = [] if self.use_bi_direction: direction = 2 else: direction = 1 for i in six.moves.range(n_layers): for di in six.moves.range(direction): weight = link.Link() with weight.init_scope(): for j in six.moves.range(self.n_weights): if i == 0 and j < self.n_weights // 2: w_in = in_size elif i > 0 and j < self.n_weights // 2: w_in = out_size * direction else: w_in = out_size w = variable.Parameter( normal.Normal(numpy.sqrt(1. / w_in)), (out_size, w_in)) b = variable.Parameter(0, (out_size,)) setattr(weight, 'w%d' % j, w) setattr(weight, 'b%d' % j, b) weights.append(weight) super(NStepRNNBase, self).__init__(*weights) self.ws = [[getattr(layer, 'w%d' % i) for i in six.moves.range(self.n_weights)] for layer in self] self.bs = [[getattr(layer, 'b%d' % i) for i in six.moves.range(self.n_weights)] for layer in self] self.n_layers = n_layers self.dropout = dropout self.out_size = out_size self.direction = direction
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``__call__`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``__call__`` functions. .. warning:: ``train`` and ``volatile`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('enable_backprop', not volatile)`` respectively. See :func:`chainer.using_config`. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def __init__(self, n_layers, in_size, out_size, dropout, **kwargs): if kwargs: argument.check_unexpected_kwargs( kwargs, use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config', use_bi_direction='use_bi_direction is not supported anymore', activation='activation is not supported anymore') argument.assert_kwargs_empty(kwargs) weights = [] if self.use_bi_direction: direction = 2 else: direction = 1 for i in six.moves.range(n_layers): for di in six.moves.range(direction): weight = link.Link() with weight.init_scope(): for j in six.moves.range(self.n_weights): if i == 0 and j < self.n_weights // 2: w_in = in_size elif i > 0 and j < self.n_weights // 2: w_in = out_size * direction else: w_in = out_size w = variable.Parameter( normal.Normal(numpy.sqrt(1. / w_in)), (out_size, w_in)) b = variable.Parameter(0, (out_size, )) setattr(weight, 'w%d' % j, w) setattr(weight, 'b%d' % j, b) weights.append(weight) super(NStepRNNBase, self).__init__(*weights) self.ws = [[ getattr(layer, 'w%d' % i) for i in six.moves.range(self.n_weights) ] for layer in self] self.bs = [[ getattr(layer, 'b%d' % i) for i in six.moves.range(self.n_weights) ] for layer in self] self.n_layers = n_layers self.dropout = dropout self.out_size = out_size self.direction = direction
def __init__(self, in_size, out_size, c_ratio=0.5, h_ratio=0.5, **kwargs): argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) super(StatefulZoneoutLSTM, self).__init__() self.state_size = out_size self.c_ratio = c_ratio self.h_ratio = h_ratio self.reset_state() with self.init_scope(): self.upward = linear.Linear(in_size, 4 * out_size) self.lateral = linear.Linear(out_size, 4 * out_size, nobias=True)
def meprop(x, k=10, **kwargs): """meprop(x, k=10) Args: x (~chainer.Variable): Input variable. k (int): Top k parameter Returns: ~chainer.Variable: Output variable. See the paper by X. Sun: `meProp: Sparsified Back Propagation for \ Accelerated Deep Learning with Reduced Overfitting \ <https://arxiv.org/abs/1706.06197>`_. """ argument.assert_kwargs_empty(kwargs) if configuration.config.train: return meProp(k)(x) return x
def forward(self, x, layers=None, **kwargs): """forward(self, x, layers=['prob']) Computes all the feature maps specified by ``layers``. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', False)`` to run in test mode. See :func:`chainer.using_config`. Args: x (~chainer.Variable): Input variable. It should be prepared by ``prepare`` function. layers (list of str): The list of layer names you want to extract. If ``None``, 'prob' will be used as layers. Returns: Dictionary of ~chainer.Variable: A dictionary in which the key contains the layer and the value contains the corresponding feature map variable. """ if layers is None: layers = ['prob'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config' ) argument.assert_kwargs_empty(kwargs) h = x activations = {} target_layers = set(layers) for key, funcs in self.functions.items(): if len(target_layers) == 0: break for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) return activations
def forward(self, x, layers=None, **kwargs): """forward(self, x, layers=['prob']) Computes all the feature maps specified by ``layers``. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', False)`` to run in test mode. See :func:`chainer.using_config`. Args: x (~chainer.Variable): Input variable. It should be prepared by ``prepare`` function. layers (list of str): The list of layer names you want to extract. If ``None``, 'prob' will be used as layers. Returns: Dictionary of ~chainer.Variable: A dictionary in which the key contains the layer and the value contains the corresponding feature map variable. """ if layers is None: layers = ['prob'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) h = x activations = {} target_layers = set(layers) for key, funcs in self.functions.items(): if len(target_layers) == 0: break for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) return activations
def _call(self, hs, xs, **kwargs): """Calls RNN function. Args: hs (list of ~chainer.Variable or None): Lisit of hidden states. Its length depends on its implementation. If ``None`` is specified zero-vector is used. xs (list of ~chainer.Variable): List of input sequences. Each element ``xs[i]`` is a :class:`chainer.Variable` holding a sequence. Returns: tuple: hs """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) assert isinstance(xs, (list, tuple)) xp = cuda.get_array_module(*(list(hs) + list(xs))) indices = argsort_list_descent(xs) indices_array = xp.array(indices) xs = permutate_list(xs, indices, inv=False) hxs = [] for hx in hs: if hx is None: hx = self.init_hx(xs) else: hx = permutate.permutate(hx, indices_array, axis=1, inv=False) hxs.append(hx) trans_x = transpose_sequence.transpose_sequence(xs) args = [self.n_layers, self.dropout] + hxs + \ [self.ws, self.bs, trans_x] result = self.rnn(*args) hys = [permutate.permutate(h, indices_array, axis=1, inv=True) for h in result[:-1]] trans_y = result[-1] ys = transpose_sequence.transpose_sequence(trans_y) ys = permutate_list(ys, indices, inv=True) return hys, ys
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, nobias=False, outsize=None, initialW=None, initial_bias=None, group=1, **kwargs): super(Deconvolution2D, self).__init__() argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.outsize = (None, None) if outsize is None else outsize self.out_channels = out_channels self.group = int(group) with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.W = variable.Parameter(W_initializer) if in_channels is not None: self._initialize_params(in_channels) if nobias: self.b = None else: if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)): assert initial_bias.shape == (out_channels, ) if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = variable.Parameter(bias_initializer, out_channels)
def __init__(self, in_channels, out_channels, units, ksize=None, stride=1, pad=0, nobias=False, initialW=None, initial_bias=None, **kwargs): super().__init__() argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.units = units self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.out_channels = out_channels with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.W = variable.Parameter(W_initializer) if in_channels is not None: self._initialize_params(in_channels) if nobias: self.b = None else: if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = variable.Parameter(bias_initializer, (units, out_channels // units))
def calc_activation(self, x, layers=None, **kwargs): """calc_activation(self, x, layers=['fc6']) Computes all the feature maps specified by ``layers``. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (~chainer.Variable): Input variable. It should be prepared by ``prepare`` function. layers (list of str): The list of layer names you want to extract. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['fc6'] argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) h = x activations = {} target_layers = set(layers) functions_ordered_dict = self.base.functions() for key, funcs in functions_ordered_dict.items(): #print(key) #print(funcs) #print('h',h.shape,type(h)) if len(target_layers) == 0: break for func in funcs: #print(func) h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) #print('calc_activation,finished') #print(activations) return activations
def __init__(self, stride=1, pad=0, outsize=None, requires_x_grad=True, **kwargs): argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.outh, self.outw = (None, None) if outsize is None else outsize self.requires_x_grad = requires_x_grad
def make_extension(trigger=None, default_name=None, priority=None, finalizer=None, initializer=None, on_error=None, **kwargs): """Decorator to make given functions into trainer extensions. This decorator just adds some attributes to a given function. The value of the attributes are given by the arguments of this decorator. See :class:`Extension` for details of trainer extensions. Most of the default values of arguments also follow those for this class. Args: trigger: Default trigger of the extension. default_name: Default name of the extension. The name of a given function is used by default. priority (int): Default priority of the extension. finalizer: Finalizer function of this extension. It is called at the end of the training loop. initializer: Initializer function of this extension. It is called at the beginning of the training loop. on_error: Error handler callback function of this extension. It is called after an error is raised during the trainer loop. """ if kwargs: msg = ('invoke_before_training has been removed since Chainer v2.0.0. ' 'Use initializer= instead.') argument.check_unexpected_kwargs(kwargs, invoke_before_training=msg) argument.assert_kwargs_empty(kwargs) if trigger is None: trigger = Extension.trigger if priority is None: priority = Extension.priority def decorator(ext): ext.trigger = trigger ext.default_name = default_name or ext.__name__ ext.priority = priority ext.finalize = finalizer ext.on_error = on_error ext.initialize = initializer return ext return decorator
def snapshot_object(target, filename, savefun=npz.save_npz, **kwargs): """Returns a trainer extension to take snapshots of a given object. This extension serializes the given object and saves it to the output directory. This extension is called once per epoch by default. To take a snapshot at a different interval, a trigger object specifying the required interval can be passed along with this extension to the `extend()` method of the trainer. The default priority is -100, which is lower than that of most built-in extensions. Args: target: Object to serialize. filename (str): Name of the file into which the object is serialized. It can be a format string, where the trainer object is passed to the :meth:`str.format` method. For example, ``'snapshot_{.updater.iteration}'`` is converted to ``'snapshot_10000'`` at the 10,000th iteration. savefun: Function to save the object. It takes two arguments: the output file path and the object to serialize. snapshot_on_error (bool): Whether to take a snapshot in case trainer loop has been failed. Returns: Snapshot extension object. .. seealso:: - :meth:`chainer.training.extensions.snapshot` """ snapshot_on_error = argument.parse_kwargs( kwargs, ('snapshot_on_error', False)) argument.assert_kwargs_empty(kwargs) return _Snapshot( target=target, writer=snapshot_writers.SimpleWriter(savefun=savefun), filename=filename, snapshot_on_error=snapshot_on_error)
def snapshot_object(target, filename, savefun=npz.save_npz, **kwargs): """Returns a trainer extension to take snapshots of a given object. This extension serializes the given object and saves it to the output directory. This extension is called once per epoch by default. To take a snapshot at a different interval, a trigger object specifying the required interval can be passed along with this extension to the `extend()` method of the trainer. The default priority is -100, which is lower than that of most built-in extensions. Args: target: Object to serialize. filename (str): Name of the file into which the object is serialized. It can be a format string, where the trainer object is passed to the :meth:`str.format` method. For example, ``'snapshot_{.updater.iteration}'`` is converted to ``'snapshot_10000'`` at the 10,000th iteration. savefun: Function to save the object. It takes two arguments: the output file path and the object to serialize. snapshot_on_error (bool): Whether to take a snapshot in case trainer loop has been failed. Returns: Snapshot extension object. .. seealso:: - :meth:`chainer.training.extensions.snapshot` """ snapshot_on_error = argument.parse_kwargs(kwargs, ('snapshot_on_error', False)) argument.assert_kwargs_empty(kwargs) return _Snapshot(target=target, writer=snapshot_writers.SimpleWriter(savefun=savefun), filename=filename, snapshot_on_error=snapshot_on_error)
def __init__(self, n_layers, states, rnn_dir, rnn_mode, **kwargs): argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if rnn_dir not in _rnn_dirs: candidate_list = ','.join(_rnn_dirs.keys()) raise ValueError('Invalid rnn_dir: "%s". Please select from [%s]' % (rnn_dir, candidate_list)) if rnn_mode not in _rnn_modes: candidate_list = ','.join(_rnn_modes.keys()) raise ValueError('Invalid rnn_mode: "%s". Please select from [%s]' % (rnn_mode, candidate_list)) self.rnn_dir = _rnn_dirs[rnn_dir] self.rnn_mode = _rnn_modes[rnn_mode] self.rnn_direction = _rnn_params_direction[self.rnn_dir] self.n_layers = n_layers self.states = states self.use_cell = _rnn_params_use_cell[self.rnn_mode] self.n_W = _rnn_n_params[self.rnn_mode]
def __init__(self, n_layers, in_size, out_size, dropout, initialW, initial_bias, use_bi_direction, **kwargs): argument.check_unexpected_kwargs( kwargs, use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if initial_bias is None: initial_bias = initializers.constant.Zero() initialW = initializers._get_initializer(initialW) weights = [] direction = 2 if use_bi_direction else 1 for i in six.moves.range(n_layers): for di in six.moves.range(direction): weight = link.Link() with weight.init_scope(): for j in six.moves.range(8): if i == 0 and j < 4: w_in = in_size elif i > 0 and j < 4: w_in = out_size * direction else: w_in = out_size name_w = 'w{}'.format(j) name_b = 'b{}'.format(j) w = variable.Parameter(initialW, (out_size, w_in)) b = variable.Parameter(initial_bias, (out_size, )) setattr(weight, name_w, w) setattr(weight, name_b, b) weights.append(weight) super(NStepLSTMBase, self).__init__(*weights) self.n_layers = n_layers self.dropout = dropout self.out_size = out_size self.direction = direction self.rnn = rnn.n_step_bilstm if use_bi_direction else rnn.n_step_lstm
def __init__(self, n_layers, in_size, out_size, dropout, initialW, initial_bias, use_bi_direction, **kwargs): argument.check_unexpected_kwargs( kwargs, use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if initial_bias is None: initial_bias = initializers.constant.Zero() initialW = initializers._get_initializer(initialW) weights = [] direction = 2 if use_bi_direction else 1 for i in six.moves.range(n_layers): for di in six.moves.range(direction): weight = link.Link() with weight.init_scope(): for j in six.moves.range(8): if i == 0 and j < 4: w_in = in_size elif i > 0 and j < 4: w_in = out_size * direction else: w_in = out_size name_w = 'w{}'.format(j) name_b = 'b{}'.format(j) w = variable.Parameter(initialW, (out_size, w_in)) b = variable.Parameter(initial_bias, (out_size,)) setattr(weight, name_w, w) setattr(weight, name_b, b) weights.append(weight) super(NStepLSTMBase, self).__init__(*weights) self.n_layers = n_layers self.dropout = dropout self.out_size = out_size self.direction = direction self.rnn = rnn.n_step_bilstm if use_bi_direction else rnn.n_step_lstm
def __call__(self, x, layers=['prob'], **kwargs): """__call__(self, x, layers=['prob']) Computes all the feature maps specified by ``layers``. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (~chainer.Variable): Input variable. layers (list of str): The list of layer names you want to extract. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) h = x activations = {} target_layers = set(layers) for key, funcs in self.functions.items(): if len(target_layers) == 0: break for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) return activations
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, nobias=False, outsize=None, initialV=None, **kwargs): super(Deconvolution2D, self).__init__() argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.outsize = (None, None) if outsize is None else outsize self.out_channels = out_channels self.nobias = nobias with self.init_scope(): V_initializer = initializers._get_initializer(initialV) self.V = variable.Parameter(V_initializer) if in_channels is not None: kh, kw = _pair(self.ksize) V_shape = (in_channels, self.out_channels, kh, kw) self.V.initialize(V_shape) self.b = None if nobias else variable.Parameter(None) self.g = variable.Parameter(None)
def __call__(self, x, layers=['prob'], **kwargs): """__call__(self, x, layers=['prob']) Computes all the feature maps specified by ``layers``. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (~chainer.Variable): Input variable. layers (list of str): The list of layer names you want to extract. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) h = x activations = {} target_layers = set(layers) for key, funcs in self.functions.items(): if len(target_layers) == 0: break for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) return activations
def forward(self, x, layers=None, **kwargs): """forward(self, x, layers=['prob']) Computes all the feature maps specified by ``layers``. Args: x (~chainer.Variable): Input variable. It should be prepared by ``prepare`` function. layers (list of str): The list of layer names you want to extract. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['prob'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) h = x activations = {} target_layers = set(layers) for key, funcs in self.functions.items(): if not target_layers: break for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) return activations
def forward(self, x, layers=None, **kwargs): """forward(self, x, layers=['prob']) Computes all the feature maps specified by ``layers``. Args: x (~chainer.Variable): Input variable. It should be prepared by ``prepare`` function. layers (list of str): The list of layer names you want to extract. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['prob'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) h = x activations = {} target_layers = set(layers) for key, funcs in self.functions.items(): if not target_layers: break for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) return activations
def forward(self, x, layers=None, **kwargs): if layers is None: layers = ['prob'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config' ) argument.assert_kwargs_empty(kwargs) h = x activations = {} target_layers = set(layers) for key, funcs in self.functions.items(): if len(target_layers) == 0: break for func in funcs: h = func(h) if key in target_layers: activations[key] = h target_layers.remove(key) return activations
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, nobias=False, outsize=None, initialW=None, initial_bias=None, group=1, **kwargs): super(Deconvolution2D, self).__init__() argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.outsize = (None, None) if outsize is None else outsize self.out_channels = out_channels self.group = int(group) with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.W = variable.Parameter(W_initializer) if in_channels is not None: self._initialize_params(in_channels) if nobias: self.b = None else: if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)): assert initial_bias.shape == (out_channels,) if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = variable.Parameter(bias_initializer, out_channels)
def deconvolution_2d(x, W, b=None, stride=1, pad=0, outsize=None, **kwargs): """deconvolution_2d(x, W, b=None, stride=1, pad=0, outsize=None) Two dimensional deconvolution function. This is an implementation of two-dimensional deconvolution. In most of deep learning frameworks and papers, this function is called **transposed convolution**. But because of historical reasons (e.g. paper by Ziller `Deconvolutional Networks`_) and backward compatibility, this function is called **deconvolution** in Chainer. .. _Deconvolutional Networks: \ http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf It takes three variables: input image ``x``, the filter weight ``W``, and the bias vector ``b``. Notation: here is a notation for dimensionalities. - :math:`n` is the batch size. - :math:`c_I` and :math:`c_O` are the number of the input and output channels, respectively. - :math:`h_I` and :math:`w_I` are the height and width of the input image, respectively. - :math:`h_K` and :math:`w_K` are the height and width of the filters, respectively. - :math:`h_P` and :math:`w_P` are the height and width of the spatial padding size, respectively. Let :math:`(s_Y, s_X)` be the stride of filter application. Then, the output size :math:`(h_O, w_O)` is estimated by the following equations: .. math:: h_O &= s_Y (h_I - 1) + h_K - 2h_P,\\\\ w_O &= s_X (w_I - 1) + w_K - 2w_P. The output of this function can be non-deterministic when it uses cuDNN. If ``chainer.configuration.config.deterministic`` is ``True`` and cuDNN version is >= v3, it forces cuDNN to use a deterministic algorithm. .. warning:: ``deterministic`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('cudnn_deterministic', value)`` (value is either ``True`` or ``False``). See :func:`chainer.using_config`. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable of shape :math:`(n, c_I, h_I, w_I)`. W (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Weight variable of shape :math:`(c_I, c_O, h_K, w_K)`. b (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Bias variable of length :math:`c_O` (optional). stride (:class:`int` or pair of :class:`int` s): Stride of filter applications. ``stride=s`` and ``stride=(s, s)`` are equivalent. pad (:class:`int` or pair of :class:`int` s): Spatial padding width for input arrays. ``pad=p`` and ``pad=(p, p)`` are equivalent. outsize (:class:`tuple` of :class:`int`): Expected output size of deconvolutional operation. It should be pair of height and width :math:`(h_O, w_O)`. Default value is ``None`` and the outsize is estimated by input size, stride and pad. Returns: ~chainer.Variable: Output variable of shape :math:`(n, c_O, h_O, w_O)`. .. admonition:: Example >>> n = 10 >>> c_i, c_o = 1, 3 >>> h_i, w_i = 5, 10 >>> h_k, w_k = 10, 10 >>> h_p, w_p = 5, 5 >>> x = np.random.uniform(0, 1, (n, c_i, h_i, w_i)).astype('f') >>> x.shape (10, 1, 5, 10) >>> W = np.random.uniform(0, 1, (c_i, c_o, h_k, w_k)).astype('f') >>> W.shape (1, 3, 10, 10) >>> b = np.random.uniform(0, 1, c_o).astype('f') >>> b.shape (3,) >>> s_y, s_x = 5, 5 >>> y = F.deconvolution_2d(x, W, b, stride=(s_y, s_x), pad=(h_p, w_p)) >>> y.shape (10, 3, 20, 45) >>> h_o = s_y * (h_i - 1) + h_k - 2 * h_p >>> w_o = s_x * (w_i - 1) + w_k - 2 * w_p >>> y.shape == (n, c_o, h_o, w_o) True """ argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") argument.assert_kwargs_empty(kwargs) requires_x_grad = isinstance(x, variable.Variable) and x.requires_grad func = Deconvolution2DFunction(stride, pad, outsize, requires_x_grad) if b is None: return func(x, W) else: return func(x, W, b)
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``forward`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``forward`` functions. Unlike ``predict`` method, this method does not override ``chainer.config.train`` and ``chainer.config.enable_backprop`` configuration. If you want to extract features without updating model parameters, you need to manually set configuration when calling this method as follows: .. code-block:: python # model is an instance of ResNetLayers (50 or 101 or 152 layers) with chainer.using_config('train', False): with chainer.using_config('enable_backprop', False): feature = model.extract([image]) .. warning:: ``test`` and ``volatile`` arguments are not supported anymore since v2. Instead, users should configure training and volatile modes with ``train`` and ``enable_backprop``, respectively. Note that default behavior of this method is different between v1 and later versions. Specifically, the default values of ``test`` in v1 were ``True`` (test mode). But that of ``chainer.config.train`` is also ``True`` (train mode). Therefore, users need to explicitly switch ``train`` to ``False`` to run the code in test mode and ``enable_backprop`` to ``False`` to turn off coputational graph construction. See the `upgrade guide <https://docs.chainer.org/en/stable\ /upgrade_v2.html#training-mode-is-configured-by-a-thread-local-flag>`_. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. .. warning:: ``train`` and ``use_cudnn`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('use_cudnn', use_cudnn)`` respectively. See :func:`chainer.using_config`. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. Because of bi-direction, the first dimension length is ``2S``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = cuda.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = get_random_state().create_dropout_states(dropout_ratio) lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) w = n_step_rnn.cudnn_rnn_weight_concat( n_layers, states, use_bi_direction, 'gru', ws, bs) if use_bi_direction: rnn = NStepBiGRU else: rnn = NStepGRU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: hy, _, ys = n_step_rnn.n_step_rnn_impl( _gru, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. .. warning:: ``train`` and ``use_cudnn`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('use_cudnn', use_cudnn)`` respectively. See :func:`chainer.using_config`. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. Because of bi-direction, the first dimension length is ``2S``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this functions supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = cuda.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple(itertools.chain( (hx, ), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), xs)) if use_bi_direction: rnn = NStepBiGRU(n_layers, states) else: rnn = NStepGRU(n_layers, states) ret = rnn(*inputs) hy, = ret[:1] ys = ret[1:] return hy, ys else: direction = 2 if use_bi_direction else 1 hx = split_axis.split_axis(hx, n_layers * direction, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] xws = [concat.concat([w[0], w[1], w[2]], axis=0) for w in ws] hws = [concat.concat([w[3], w[4], w[5]], axis=0) for w in ws] xbs = [concat.concat([b[0], b[1], b[2]], axis=0) for b in bs] hbs = [concat.concat([b[3], b[4], b[5]], axis=0) for b in bs] xs_next = xs hy = [] for layer in six.moves.range(n_layers): def _one_directional_loop(di): # di=0, forward GRU # di=1, backward GRU xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx]) gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx]) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) h_bar = (1 - z) * h_bar + z * h if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list # Forward GRU h, h_forward = _one_directional_loop(di=0) hy.append(h) if use_bi_direction: # Backward GRU h, h_backward = _one_directional_loop(di=1) h_backward.reverse() # Concat xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in six.moves.zip(h_forward, h_backward)] hy.append(h) else: # Uni-directional GRU xs_next = h_forward ys = xs_next hy = stack.stack(hy) return hy, tuple(ys)