Example #1
0
def argmax_crf1d(cost, xs):
    alpha = xs[0]
    alphas = []
    max_inds = []
    for x in xs[1:]:
        batch = x.shape[0]
        if alpha.shape[0] > batch:
            alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0)
            alphas.append(alpha_rest)
        else:
            alphas.append(None)
        b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost)
        scores = b_alpha + b_cost
        max_ind = minmax.argmax(scores, axis=1)
        max_inds.append(max_ind)
        alpha = minmax.max(scores, axis=1) + x

    inds = minmax.argmax(alpha, axis=1)
    path = [inds.data]
    for m, a in zip(max_inds[::-1], alphas[::-1]):
        inds = select_item.select_item(m, inds)
        if a is not None:
            inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0)
        path.append(inds.data)
    path.reverse()

    score = minmax.max(alpha, axis=1)
    for a in alphas[::-1]:
        if a is None:
            continue
        score = concat.concat([score, minmax.max(a, axis=1)], axis=0)

    return score, path
Example #2
0
def argmax_crf1d(cost, xs):
    """Computes a state that maximizes a joint probability of the given CRF.

    Args:
        cost (Variable): A :math:`K \\times K` matrix which holds transition
            cost between two labels, where :math:`K` is the number of labels.
        xs (list of Variable): Input vector for each label.
            ``len(xs)`` denotes the length of the sequence,
            and each :class:`~chainer.Variable` holds a :math:`B \\times K`
            matrix, where :math:`B` is mini-batch size, :math:`K` is the number
            of labels.
            Note that :math:`B` s in all the variables are not necessary
            the same, i.e., it accepts the input sequences with different
            lengths.

    Returns:
        tuple: A tuple of :class:`~chainer.Variable` object ``s`` and a
        :class:`list` ``ps``.
        The shape of ``s`` is ``(B,)``, where ``B`` is the mini-batch size.
        i-th element of ``s``, ``s[i]``, represents log-likelihood of i-th
        data.
        ``ps`` is a list of :class:`numpy.ndarray` or
        :class:`cupy.ndarray`, and denotes the state that maximizes the
        point probability.
        ``len(ps)`` is equal to ``len(xs)``, and shape of each ``ps[i]`` is
        the mini-batch size of the corresponding ``xs[i]``. That means,
        ``ps[i].shape == xs[i].shape[0:1]``.
    """
    alpha = xs[0]
    alphas = []
    max_inds = []
    for x in xs[1:]:
        batch = x.shape[0]
        if alpha.shape[0] > batch:
            alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0)
            alphas.append(alpha_rest)
        else:
            alphas.append(None)
        b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost)
        scores = b_alpha + b_cost
        max_ind = minmax.argmax(scores, axis=1)
        max_inds.append(max_ind)
        alpha = minmax.max(scores, axis=1) + x

    inds = minmax.argmax(alpha, axis=1)
    path = [inds.data]
    for m, a in zip(max_inds[::-1], alphas[::-1]):
        inds = select_item.select_item(m, inds)
        if a is not None:
            inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0)
        path.append(inds.data)
    path.reverse()

    score = minmax.max(alpha, axis=1)
    for a in alphas[::-1]:
        if a is None:
            continue
        score = concat.concat([score, minmax.max(a, axis=1)], axis=0)

    return score, path
Example #3
0
    def __call__(self, *cshsx):
        """Returns new cell state and output of Child-Sum TreeLSTM.

        Args:
            cshsx (list of :class:`~chainer.Variable`): Variable arguments
                which include all cell vectors and all output vectors of
                variable children, and an input vector.

        Returns:
            tuple of ~chainer.Variable: Returns
            :math:`(c_{new}, h_{new})`, where :math:`c_{new}` represents
            new cell state vector, and :math:`h_{new}` is new output
            vector.

        """

        cs = cshsx[:len(cshsx) // 2]
        hs = cshsx[len(cshsx) // 2:-1]
        x = cshsx[-1]
        assert (len(cshsx) % 2 == 1)
        assert (len(cs) == len(hs))

        if x is None:
            if any(c is not None for c in cs):
                base = [c for c in cs if c is not None][0]
            elif any(h is not None for h in hs):
                base = [h for h in hs if h is not None][0]
            else:
                raise ValueError('All inputs (cs, hs, x) are None.')
            batchsize, dtype = base.shape[0], base.dtype
            x = self.xp.zeros((batchsize, self.in_size), dtype=dtype)

        W_x_in = self.W_x(x)
        W_x_aio_in, W_x_f_in = split_axis.split_axis(W_x_in,
                                                     [3 * self.state_size],
                                                     axis=1)

        if len(hs) == 0:
            aio_in = W_x_aio_in
            a, i, o = split_axis.split_axis(aio_in, 3, axis=1)
            c = sigmoid.sigmoid(i) * tanh.tanh(a)
            h = sigmoid.sigmoid(o) * tanh.tanh(c)
            return c, h

        hs = self._pad_zero_nodes(hs, (x.shape[0], self.state_size),
                                  dtype=x.dtype)
        cs = self._pad_zero_nodes(cs, (x.shape[0], self.state_size),
                                  dtype=x.dtype)

        aio_in = self.W_h_aio(sum(hs)) + W_x_aio_in
        W_h_fs_in = concat.concat(split_axis.split_axis(self.W_h_f(
            concat.concat(hs, axis=0)),
                                                        len(hs),
                                                        axis=0),
                                  axis=1)
        f_in = W_h_fs_in + \
            concat.concat([W_x_f_in] * len(hs), axis=1)
        tree_lstm_in = concat.concat([aio_in, f_in], axis=1)

        return tree_lstm.tree_lstm(*(cs + (tree_lstm_in, )))
Example #4
0
    def __call__(self, c, h):
        """Updates the internal state and returns the Cell outputs.

           Remember to treat this Grid cell as if its an LSTM and pass
           ``c`` as well as ``h``. Only parts of ``c`` will be used
           depending on whether there is a LSTM or not.

        Args:
            c (~chainer.Variable): The previous memory information.
            h (~chainer.Variable): The previous state information.

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
            ``c_new`` represents new cell state, and ``h_new`` is updated
            output. Parts of ``c_new`` will be useless.

        """
        assert h is not None
        assert c is not None
        c = split_axis.split_axis(c, self.out_indices, 1, True)
        h_list = []
        h_curr = None
        for layer_id, layer in enumerate(self):
            layer_params = inspect.getargspec(layer)[0]
            if 'c' in layer_params:
                h_curr = layer(c[layer_id], h)
            else:
                h_curr = (c[layer_id], layer(h))
            h_list.append(h_curr)
        h_new = concat.concat([x[1] for x in h_list], 1)
        c_new = concat.concat([x[0] for x in h_list], 1)
        return c_new, h_new
Example #5
0
def argmax_crf1d(cost, xs):
    """Computes a state that maximizes a joint probability of the given CRF.

    Args:
        cost (Variable): A :math:`K \\times K` matrix which holds transition
            cost between two labels, where :math:`K` is the number of labels.
        xs (list of Variable): Input vector for each label.
            ``len(xs)`` denotes the length of the sequence,
            and each :class:`~chainer.Variable` holds a :math:`B \\times K`
            matrix, where :math:`B` is mini-batch size, :math:`K` is the number
            of labels.
            Note that :math:`B`\\ s in all the variables are not necessary
            the same, i.e., it accepts the input sequences with different
            lengths.

    Returns:
        tuple: A tuple of :class:`~chainer.Variable` object ``s`` and a
        :class:`list` ``ps``.
        The shape of ``s`` is ``(B,)``, where ``B`` is the mini-batch size.
        i-th element of ``s``, ``s[i]``, represents log-likelihood of i-th
        data.
        ``ps`` is a list of :class:`numpy.ndarray` or
        :class:`cupy.ndarray`, and denotes the state that maximizes the
        point probability.
        ``len(ps)`` is equal to ``len(xs)``, and shape of each ``ps[i]`` is
        the mini-batch size of the corresponding ``xs[i]``. That means,
        ``ps[i].shape == xs[i].shape[0:1]``.
    """
    alpha = xs[0]
    alphas = []
    max_inds = []
    for x in xs[1:]:
        batch = x.shape[0]
        if alpha.shape[0] > batch:
            alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0)
            alphas.append(alpha_rest)
        else:
            alphas.append(None)
        b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost)
        scores = b_alpha + b_cost
        max_ind = minmax.argmax(scores, axis=1)
        max_inds.append(max_ind)
        alpha = minmax.max(scores, axis=1) + x

    inds = minmax.argmax(alpha, axis=1)
    path = [inds.data]
    for m, a in zip(max_inds[::-1], alphas[::-1]):
        inds = select_item.select_item(m, inds)
        if a is not None:
            inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0)
        path.append(inds.data)
    path.reverse()

    score = minmax.max(alpha, axis=1)
    for a in alphas[::-1]:
        if a is None:
            continue
        score = concat.concat([score, minmax.max(a, axis=1)], axis=0)

    return score, path
Example #6
0
def argmax_crf1d(cost, xs):
    alpha = xs[0]
    alphas = []
    max_inds = []
    for x in xs[1:]:
        batch = x.shape[0]
        if alpha.shape[0] > batch:
            alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0)
            alphas.append(alpha_rest)
        else:
            alphas.append(None)
        b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost)
        scores = b_alpha + b_cost
        max_ind = minmax.argmax(scores, axis=1)
        max_inds.append(max_ind)
        alpha = minmax.max(scores, axis=1) + x

    inds = minmax.argmax(alpha, axis=1)
    path = [inds.data]
    for m, a in zip(max_inds[::-1], alphas[::-1]):
        inds = select_item.select_item(m, inds)
        if a is not None:
            inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0)
        path.append(inds.data)
    path.reverse()

    score = minmax.max(alpha, axis=1)
    for a in alphas[::-1]:
        if a is None:
            continue
        score = concat.concat([score, minmax.max(a, axis=1)], axis=0)

    return score, path
Example #7
0
    def __call__(self, x1, x2, eps=0.001, test=None):
        h = x1
        h_d = x2

        h = F.relu(self.bn1(self.conv1(h)))
        h = F.max_pooling_2d(h, 3, stride=2)
        # Res Blocks
        h = self.res2(h, test=test)  # 1/4
        h = self.res3(h, test=test)  # 1/8
        pool1_8 = h
        h = self.res4(h, test=test)  # 1/16
        pool1_16 = h
        h = self.res5(h, test=test)  # 1/32
        pool1_32 = h

        # upscore 1/32 -> 1/1
        h = self.upscore32(pool1_32)
        upscore1 = h  # 1/1

        # upscore 1/16 -> 1/1
        h = self.upscore16(pool1_16)
        upscore2 = h  # 1/1

        # concat conv
        h = concat.concat((upscore1, upscore2, pool1_8), axis=1)
        h = F.relu(self.concat_conv(h))
        concat_pool = h

        # score
        h = F.relu(self.score_pool(concat_pool))
        score1_8 = h
        h = F.relu(self.upscore_final(h))
        score = h  # 1/1
        self.score = score  # XXX: for backward compatibility

        h = F.relu(self.cls_pool(concat_pool))
        cls_pool1_8 = h

        h_d = F.relu(self.bn_d1_1(self.conv_d1_1(h_d)))
        h_d = F.relu(self.bn_d1_2(self.conv_d1_2(h_d)))
        h_d = F.max_pooling_2d(h_d, 2, stride=2, pad=0)  # 1/2
        h_d = F.relu(self.bn_d2(self.conv_d2(h_d)))
        h_d = F.max_pooling_2d(h_d, 2, stride=2, pad=0)  # 1/4
        h_d = F.relu(self.bn_d3(self.conv_d3(h_d)))
        h_d = F.max_pooling_2d(h_d, 2, stride=2, pad=0)  # 1/8

        h_cp = concat.concat((h_d, pool1_8, cls_pool1_8), axis=1)
        h_cp = F.relu(self.bn_cp_1(self.conv_cp_1(h_cp)))
        h_cp = F.relu(self.bn_cp_2(self.conv_cp_2(h_cp)))
        h_cp = F.relu(self.bn_cp_3(self.upscore_cp1(h_cp)))
        cp_score = F.arctan(self.upscore_cp2(h_cp))

        h_rot = concat.concat((h_d, pool1_8, cls_pool1_8), axis=1)
        h_rot = F.relu(self.bn_rot_1(self.conv_rot_1(h_rot)))
        h_rot = F.relu(self.bn_rot_2(self.conv_rot_2(h_rot)))
        h_rot = F.relu(self.bn_rot_3(self.upscore_rot1(h_rot)))
        rot_score = F.tanh(self.upscore_rot2(h_rot))

        return score, cp_score, rot_score
Example #8
0
    def forward(self, *cshsx):
        """Returns new cell state and output of Child-Sum TreeLSTM.

        Args:
            cshsx (list of :class:`~chainer.Variable`): Variable arguments
                which include all cell vectors and all output vectors of
                variable children, and an input vector.

        Returns:
            tuple of ~chainer.Variable: Returns
            :math:`(c_{new}, h_{new})`, where :math:`c_{new}` represents
            new cell state vector, and :math:`h_{new}` is new output
            vector.

        """

        cs = cshsx[:len(cshsx) // 2]
        hs = cshsx[len(cshsx) // 2:-1]
        x = cshsx[-1]
        assert(len(cshsx) % 2 == 1)
        assert(len(cs) == len(hs))

        if x is None:
            if any(c is not None for c in cs):
                base = [c for c in cs if c is not None][0]
            elif any(h is not None for h in hs):
                base = [h for h in hs if h is not None][0]
            else:
                raise ValueError('All inputs (cs, hs, x) are None.')
            batchsize, dtype = base.shape[0], base.dtype
            x = self.xp.zeros(
                (batchsize, self.in_size), dtype=dtype)

        W_x_in = self.W_x(x)
        W_x_aio_in, W_x_f_in = split_axis.split_axis(
            W_x_in, [3 * self.state_size], axis=1)

        if len(hs) == 0:
            aio_in = W_x_aio_in
            a, i, o = split_axis.split_axis(aio_in, 3, axis=1)
            c = sigmoid.sigmoid(i) * tanh.tanh(a)
            h = sigmoid.sigmoid(o) * tanh.tanh(c)
            return c, h

        hs = self._pad_zero_nodes(
            hs, (x.shape[0], self.state_size), dtype=x.dtype)
        cs = self._pad_zero_nodes(
            cs, (x.shape[0], self.state_size), dtype=x.dtype)

        aio_in = self.W_h_aio(sum(hs)) + W_x_aio_in
        W_h_fs_in = concat.concat(split_axis.split_axis(
            self.W_h_f(concat.concat(hs, axis=0)), len(hs), axis=0),
            axis=1)
        f_in = W_h_fs_in + \
            concat.concat([W_x_f_in] * len(hs), axis=1)
        tree_lstm_in = concat.concat([aio_in, f_in], axis=1)

        return tree_lstm.tree_lstm(*(cs + (tree_lstm_in, )))
Example #9
0
    def forward(self, x):
        outs = []

        if self.out1 > 0:
            h1 = self.conv1(x)
            h1 = self.conv1n(h1)
            h1 = relu.relu(h1)
            outs.append(h1)

        h3 = relu.relu(self.proj3n(self.proj3(x)))
        h3 = relu.relu(self.conv3n(self.conv3(h3)))
        outs.append(h3)

        h33 = relu.relu(self.proj33n(self.proj33(x)))
        h33 = relu.relu(self.conv33an(self.conv33a(h33)))
        h33 = relu.relu(self.conv33bn(self.conv33b(h33)))
        outs.append(h33)

        if self.pooltype == 'max':
            p = max_pooling_nd.max_pooling_2d(x, 3, stride=self.stride, pad=1,
                                              cover_all=False)
        else:
            p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride,
                                                      pad=1)
        if self.proj_pool is not None:
            p = relu.relu(self.poolpn(self.poolp(p)))
        outs.append(p)

        y = concat.concat(outs, axis=1)
        return y
            def _one_directional_loop(di):
                # di=0, forward RNN
                # di=1, backward RNN
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    rnn_in = (
                        linear.linear(x, xws[layer_idx], xbs[layer_idx]) +
                        linear.linear(h, hws[layer_idx], hbs[layer_idx]))
                    if activation == 'tanh':
                        h_bar = tanh.tanh(rnn_in)
                    elif activation == 'relu':
                        h_bar = relu.relu(rnn_in)

                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Example #11
0
def _gru(x, h, c, w, b):
    xw = concat.concat([w[0], w[1], w[2]], axis=0)
    hw = concat.concat([w[3], w[4], w[5]], axis=0)
    xb = concat.concat([b[0], b[1], b[2]], axis=0)
    hb = concat.concat([b[3], b[4], b[5]], axis=0)

    gru_x = linear.linear(x, xw, xb)
    gru_h = linear.linear(h, hw, hb)

    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

    r = sigmoid.sigmoid(W_r_x + U_r_h)
    z = sigmoid.sigmoid(W_z_x + U_z_h)
    h_bar = tanh.tanh(W_x + r * U_x)
    return (1 - z) * h_bar + z * h, None
Example #12
0
 def __call__(self, x, **kwargs):
     lstm_fw = self.lstm_fw(x, **kwargs)
     lstm_bw = self.lstm_bw(x, **kwargs)
     if self.concat:
         return concat([lstm_fw, lstm_bw], axis=2)
     else:
         return lstm_fw + lstm_bw
Example #13
0
def _gru(x, h, c, w, b):
    xw = concat.concat([w[0], w[1], w[2]], axis=0)
    hw = concat.concat([w[3], w[4], w[5]], axis=0)
    xb = concat.concat([b[0], b[1], b[2]], axis=0)
    hb = concat.concat([b[3], b[4], b[5]], axis=0)

    gru_x = linear.linear(x, xw, xb)
    gru_h = linear.linear(h, hw, hb)

    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

    r = sigmoid.sigmoid(W_r_x + U_r_h)
    z = sigmoid.sigmoid(W_z_x + U_z_h)
    h_bar = tanh.tanh(W_x + r * U_x)
    return (1 - z) * h_bar + z * h, None
    def __call__(self, x):
        outs = []

        if self.out1 > 0:
            h1 = self.f.conv1(x)
            h1 = self.f.conv1n(h1)
            h1 = relu.relu(h1)
            outs.append(h1)

        h3 = relu.relu(self.f.proj3n(self.f.proj3(x)))
        h3 = relu.relu(self.f.conv3n(self.f.conv3(h3)))
        outs.append(h3)

        h33 = relu.relu(self.f.proj33n(self.f.proj33(x)))
        h33 = relu.relu(self.f.conv33an(self.f.conv33a(h33)))
        h33 = relu.relu(self.f.conv33bn(self.f.conv33b(h33)))
        outs.append(h33)

        p = self.f.pool(x)
        if self.proj_pool is not None:
            p = relu.relu(self.f.poolpn(self.f.poolp(p)))
        outs.append(p)

        y = concat.concat(outs, axis=1)
        return y
def _offset2grid(offset, kh, kw, sy, sx, ph, pw, h, w):
    n, khkw2, out_h, out_w = offset.shape
    khkw = int(khkw2 / 2)
    xp = cuda.get_array_module(offset)

    ys, xs = xp.meshgrid(
        xp.arange(0, sy * out_h, sy, dtype=numpy.float32),
        xp.arange(0, sx * out_w, sx, dtype=numpy.float32), indexing='ij',
        copy=False
    )
    filter_offset_x = xp.tile(xp.arange(kw, dtype=numpy.float32), kh)
    filter_offset_y = xp.repeat(xp.arange(kh, dtype=numpy.float32), kw)
    x_coord = (offset[:, :khkw] + xs[None, None] +
               filter_offset_x[None, :, None, None])
    y_coord = (offset[:, khkw:] + ys[None, None] +
               filter_offset_y[None, :, None, None])

    # The values of this variable is clipped in range [-1, 1].
    # The coordinate (-1, -1) corresponds to the upper-left
    # corner of the input image.
    x_coord = (x_coord / (w + 2 * pw - 1) - 0.5) * 2
    y_coord = (y_coord / (h + 2 * ph - 1) - 0.5) * 2

    # Shape of `coord` is (n, 2 * kh * kw, out_h, out_w)
    coord = concat.concat([x_coord, y_coord], axis=1)
    return coord
Example #16
0
    def __call__(self, x):
        test = not self.train
        outs = []

        if self.out1 > 0:
            h1 = self.conv1(x)
            h1 = self.conv1n(h1, test=test)
            h1 = relu.relu(h1)
            outs.append(h1)

        h3 = relu.relu(self.proj3n(self.proj3(x), test=test))
        h3 = relu.relu(self.conv3n(self.conv3(h3), test=test))
        outs.append(h3)

        h33 = relu.relu(self.proj33n(self.proj33(x), test=test))
        h33 = relu.relu(self.conv33an(self.conv33a(h33), test=test))
        h33 = relu.relu(self.conv33bn(self.conv33b(h33), test=test))
        outs.append(h33)

        if self.pooltype == 'max':
            p = max_pooling_2d.max_pooling_2d(x, 3, stride=self.stride, pad=1)
        else:
            p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride,
                                                      pad=1)
        if self.proj_pool is not None:
            p = relu.relu(self.poolpn(self.poolp(p), test=test))
        outs.append(p)

        y = concat.concat(outs, axis=1)
        return y
Example #17
0
    def __call__(self, x):
        test = not self.train
        outs = []

        if self.out1 > 0:
            h1 = self.conv1(x)
            h1 = self.conv1n(h1, test=test)
            h1 = relu.relu(h1)
            outs.append(h1)

        h3 = relu.relu(self.proj3n(self.proj3(x), test=test))
        h3 = relu.relu(self.conv3n(self.conv3(h3), test=test))
        outs.append(h3)

        h33 = relu.relu(self.proj33n(self.proj33(x), test=test))
        h33 = relu.relu(self.conv33an(self.conv33a(h33), test=test))
        h33 = relu.relu(self.conv33bn(self.conv33b(h33), test=test))
        outs.append(h33)

        if self.pooltype == 'max':
            p = max_pooling_2d.max_pooling_2d(x, 3, stride=self.stride, pad=1)
        else:
            p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride,
                                                      pad=1)
        if self.proj_pool is not None:
            p = relu.relu(self.poolpn(self.poolp(p), test=test))
        outs.append(p)

        y = concat.concat(outs, axis=1)
        return y
    def __call__(self, x1, x2):
        h = x1
        ksizes = x2

        h = F.relu(self.bn1(self.conv1(h)))
        h = F.max_pooling_2d(h, 3, stride=2)
        # Res Blocks
        h = self.res2(h)  # 1/4
        pool1_4 = h
        h = self.res3(h)  # 1/8
        # upscore 1/8 -> 1/4
        pool1_8 = self.upscore2(h)

        h = self.res4(h)  # 1/16
        # upscore 1/16 -> 1/4
        h = self.upscore1(h)

        # concat 1 / 4
        h = F.leaky_relu(
            self.bn_upscore(concat.concat((h, pool1_8, pool1_4), axis=1)))
        h = F.relu(self.bn_concat(self.concat_conv(h)))

        ksizes = F.ceil(
            F.resize_images((ksizes * 5), (h.data.shape[2], h.data.shape[3])))
        h = convolutional_roi_pooling(h, ksizes, out_ksize=self.out_ksize)
        h = F.relu(self.bn_croip1(self.pool_roi_conv(h)))
        h = F.relu(self.bn_croip2(self.conv_after_croip1(h)))
        h = F.relu(self.bn_croip3(self.conv_after_croip2(h)))

        # score #1 / 4
        h = F.relu(self.score_conv(h))
        score = h  # 1/4

        return score
Example #19
0
            def _one_directional_loop(di):
                # di=0, forward RNN
                # di=1, backward RNN
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    rnn_in = (linear.linear(x, xws[layer_idx],
                                            xbs[layer_idx]) +
                              linear.linear(h, hws[layer_idx], hbs[layer_idx]))
                    if activation == 'tanh':
                        h_bar = tanh.tanh(rnn_in)
                    elif activation == 'relu':
                        h_bar = relu.relu(rnn_in)

                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Example #20
0
            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Example #21
0
def _offset2grid(offset, kh, kw, sy, sx, ph, pw, h, w):
    n, khkw2, out_h, out_w = offset.shape
    khkw = int(khkw2 / 2)
    xp = cuda.get_array_module(offset)

    ys, xs = xp.meshgrid(xp.arange(0, sy * out_h, sy, dtype=numpy.float32),
                         xp.arange(0, sx * out_w, sx, dtype=numpy.float32),
                         indexing='ij',
                         copy=False)
    filter_offset_x = xp.tile(xp.arange(kw, dtype=numpy.float32), kh)
    filter_offset_y = xp.repeat(xp.arange(kh, dtype=numpy.float32), kw)
    x_coord = (offset[:, :khkw] + xs[None, None] +
               filter_offset_x[None, :, None, None])
    y_coord = (offset[:, khkw:] + ys[None, None] +
               filter_offset_y[None, :, None, None])

    # The values of this variable is clipped in range [-1, 1].
    # The coordinate (-1, -1) corresponds to the upper-left
    # corner of the input image.
    x_coord = (x_coord / (w + 2 * pw - 1) - 0.5) * 2
    y_coord = (y_coord / (h + 2 * ph - 1) - 0.5) * 2

    # Shape of `coord` is (n, 2 * kh * kw, out_h, out_w)
    coord = concat.concat([x_coord, y_coord], axis=1)
    return coord
Example #22
0
            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Example #23
0
    def forward(self, x):
        outs = []

        if self.out1 > 0:
            h1 = self.conv1(x)
            h1 = self.conv1n(h1)
            h1 = relu.relu(h1)
            outs.append(h1)

        h3 = relu.relu(self.proj3n(self.proj3(x)))
        h3 = relu.relu(self.conv3n(self.conv3(h3)))
        outs.append(h3)

        h33 = relu.relu(self.proj33n(self.proj33(x)))
        h33 = relu.relu(self.conv33an(self.conv33a(h33)))
        h33 = relu.relu(self.conv33bn(self.conv33b(h33)))
        outs.append(h33)

        if self.pooltype == 'max':
            p = max_pooling_2d.max_pooling_2d(x, 3, stride=self.stride, pad=1,
                                              cover_all=False)
        else:
            p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride,
                                                      pad=1)
        if self.proj_pool is not None:
            p = relu.relu(self.poolpn(self.poolp(p)))
        outs.append(p)

        y = concat.concat(outs, axis=1)
        return y
Example #24
0
 def __call__(self, x, **kwargs):
     lstm_fw = self.lstm_fw(x, **kwargs)
     lstm_bw = self.lstm_bw(x, **kwargs)
     if self.concat:
         return concat([lstm_fw, lstm_bw], axis=2)
     else:
         return lstm_fw + lstm_bw
Example #25
0
 def __call__(self, x):
     out1 = self.f.conv1(x)
     out3 = self.f.conv3(relu.relu(self.f.proj3(x)))
     out5 = self.f.conv5(relu.relu(self.f.proj5(x)))
     pool = self.f.projp(max_pooling_2d.max_pooling_2d(
         x, 3, stride=1, pad=1))
     y = relu.relu(concat.concat((out1, out3, out5, pool), axis=1))
     return y
 def __call__(self, x):
     out1 = self.f.conv1(x)
     out3 = self.f.conv3(relu.relu(self.f.proj3(x)))
     out5 = self.f.conv5(relu.relu(self.f.proj5(x)))
     pool = self.f.projp(
         max_pooling_2d.max_pooling_2d(x, 3, stride=1, pad=1))
     y = relu.relu(concat.concat((out1, out3, out5, pool), axis=1))
     return y
Example #27
0
def _one_directional_loop(f, xs, h, c, w, b):
    h_list = []
    for x in xs:
        batch = len(x)
        need_split = len(h) > batch
        if need_split:
            h, h_rest = split_axis.split_axis(h, [batch], axis=0)
            if c is not None:
                c, c_rest = split_axis.split_axis(c, [batch], axis=0)

        h, c = f(x, h, c, w, b)
        h_list.append(h)

        if need_split:
            h = concat.concat([h, h_rest], axis=0)
            if c is not None:
                c = concat.concat([c, c_rest], axis=0)
    return h, c, h_list
Example #28
0
def _one_directional_loop(f, xs, h, c, w, b):
    h_list = []
    for x in xs:
        batch = len(x)
        need_split = len(h) > batch
        if need_split:
            h, h_rest = split_axis.split_axis(h, [batch], axis=0)
            if c is not None:
                c, c_rest = split_axis.split_axis(c, [batch], axis=0)

        h, c = f(x, h, c, w, b)
        h_list.append(h)

        if need_split:
            h = concat.concat([h, h_rest], axis=0)
            if c is not None:
                c = concat.concat([c, c_rest], axis=0)
    return h, c, h_list
Example #29
0
            def _one_directional_loop(di):
                # di=0, forward LSTM
                # di=1, backward LSTM
                h_list = []
                c_list = []
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                c = cx[layer_idx]
                if di == 0:
                    xs_list = xs_next
                else:
                    xs_list = reversed(xs_next)
                counter = 0
                for x in xs_list:
                    counter += 1
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                    else:
                        h_rest = None
                        c_rest = None

                    if layer != 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)
                    if counter == 4:
                        lstm_in = linear.linear(x, xws[layer_idx],
                                                xbs[layer_idx])
                    else:
                        lstm_in = linear.linear(
                            x, xws[layer_idx], xbs[layer_idx]) + linear.linear(
                                h, hws[layer_idx], hbs[layer_idx])

                    c_bar, h_bar = lstm.lstm(c, lstm_in)
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                        c = concat.concat([c_bar, c_rest], axis=0)
                    else:
                        h = h_bar
                        c = c_bar
                    h_list.append(h_bar)
                    c_list.append(c_bar)
                return h, c, h_list, c_list
Example #30
0
def n_step_rnn_impl(f, n_layers, dropout_ratio, hx, cx, ws, bs, xs,
                    use_bi_direction):
    direction = 2 if use_bi_direction else 1
    hx = chainer.functions.separate(hx)
    use_cell = cx is not None
    if use_cell:
        cx = chainer.functions.separate(cx)
    else:
        cx = [None] * len(hx)

    xs_next = xs
    hy = []
    cy = []
    for layer in six.moves.range(n_layers):

        # Forward RNN
        if layer == 0:
            xs = xs_next
        else:
            xs = _dropout_sequence(xs_next, dropout_ratio)
        idx = direction * layer
        h, c, h_forward = _one_directional_loop(f, xs, hx[idx], cx[idx],
                                                ws[idx], bs[idx])
        hy.append(h)
        cy.append(c)

        if use_bi_direction:
            # Backward RNN
            idx = direction * layer + 1
            if layer == 0:
                xs = xs_next
            else:
                xs = _dropout_sequence(xs_next, dropout_ratio)
            h, c, h_backward = _one_directional_loop(f, reversed(xs), hx[idx],
                                                     cx[idx], ws[idx], bs[idx])
            h_backward.reverse()
            # Concat
            xs_next = [
                concat.concat([hfi, hbi], axis=1)
                for hfi, hbi in six.moves.zip(h_forward, h_backward)
            ]
            hy.append(h)
            cy.append(c)
        else:
            # Uni-directional RNN
            xs_next = h_forward

    ys = xs_next
    hy = stack.stack(hy)
    if use_cell:
        cy = stack.stack(cy)
    else:
        cy = None
    return hy, cy, tuple(ys)
Example #31
0
 def __call__(self, x):
     x = self.embed(x)
     xs = split_axis.split_axis(x, x.data.shape[1], 1)
     ret = []
     for x in xs:
         x = self.rnn1(x)
         x = self.rnn2(x)
         x = self.linear(x)
         x = reshape.reshape(x, x.data.shape + (-1, ))
         ret.append(x)
     ret = concat.concat(ret, axis=2)
     return ret
Example #32
0
def n_step_rnn_impl(
        f, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction):
    direction = 2 if use_bi_direction else 1
    hx = chainer.functions.separate(hx)
    use_cell = cx is not None
    if use_cell:
        cx = chainer.functions.separate(cx)
    else:
        cx = [None] * len(hx)

    xs_next = xs
    hy = []
    cy = []
    for layer in six.moves.range(n_layers):

        # Forward RNN
        if layer == 0:
            xs = xs_next
        else:
            xs = _dropout_sequence(xs_next, dropout_ratio)
        idx = direction * layer
        h, c, h_forward = _one_directional_loop(
            f, xs, hx[idx], cx[idx], ws[idx], bs[idx])
        hy.append(h)
        cy.append(c)

        if use_bi_direction:
            # Backward RNN
            idx = direction * layer + 1
            if layer == 0:
                xs = xs_next
            else:
                xs = _dropout_sequence(xs_next, dropout_ratio)
            h, c, h_backward = _one_directional_loop(
                f, reversed(xs), hx[idx], cx[idx], ws[idx], bs[idx])
            h_backward.reverse()
            # Concat
            xs_next = [concat.concat([hfi, hbi], axis=1) for hfi, hbi in
                       six.moves.zip(h_forward, h_backward)]
            hy.append(h)
            cy.append(c)
        else:
            # Uni-directional RNN
            xs_next = h_forward

    ys = xs_next
    hy = stack.stack(hy)
    if use_cell:
        cy = stack.stack(cy)
    else:
        cy = None
    return hy, cy, tuple(ys)
Example #33
0
def stack(xs, axis=0):
    """Concatenate variables along a new axis.

    Args:
        xs (list of chainer.Variable): Variables to be concatenated.
        axis (int): Axis of result along which variables are stacked.

    Returns:
        ~chainer.Variable: Output variable.

    """
    xs = [expand_dims.expand_dims(x, axis=axis) for x in xs]
    return concat.concat(xs, axis=axis)
Example #34
0
def black_out(x, t, W, samples):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    Args:
        x (~chainer.Variable): Batch of input vectors.
        t (~chainer.Variable): Vector of ground truth labels.
        W (~chainer.Variable): Weight matrix.
        samples (~chainer.Variable): Negative samples.

    Returns:
        ~chainer.Variable: Loss value.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.batch_matmul(neg_emb, x)
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.batch_matmul(pos_emb, x)
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(
        reshape.reshape(logz, (batch_size, 1)), neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size,))
    loss = py - logz + _sum.sum(ny, axis=1)
    return -_sum.sum(loss) / batch_size
Example #35
0
def black_out(x, t, W, samples):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    Args:
        x (~chainer.Variable): Batch of input vectors.
        t (~chainer.Variable): Vector of ground truth labels.
        W (~chainer.Variable): Weight matrix.
        samples (~chainer.Variable): Negative samples.

    Returns:
        ~chainer.Variable: Loss value.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.batch_matmul(neg_emb, x)
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.batch_matmul(pos_emb, x)
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(reshape.reshape(logz, (batch_size, 1)),
                                        neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size, ))
    loss = py - logz + _sum.sum(ny, axis=1)
    return -_sum.sum(loss) / batch_size
Example #36
0
            def _one_directional_loop(di):
                # di=0, forward LSTM
                # di=1, backward LSTM
                h_list = []
                c_list = []
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                c = cx[layer_idx]
                if di == 0:
                    xs_list = xs_next
                else:
                    xs_list = reversed(xs_next)
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                    else:
                        h_rest = None
                        c_rest = None

                    if layer != 0:
                        x = dropout.dropout(x, ratio=dropout_ratio,
                                            train=train)
                    lstm_in = linear.linear(x, xws[layer_idx],
                                            xbs[layer_idx]) + \
                        linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    c_bar, h_bar = lstm.lstm(c, lstm_in)
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                        c = concat.concat([c_bar, c_rest], axis=0)
                    else:
                        h = h_bar
                        c = c_bar
                    h_list.append(h_bar)
                    c_list.append(c_bar)
                return h, c, h_list, c_list
Example #37
0
 def __call__(self, x):
     x = self.embed(x)
     xs = split_axis.split_axis(x, x.data.shape[1], 1)
     ret = []
     for x in xs:
         for l in self.rnns:
             x = l(x)
             x = dropout.dropout(x, 0.25, self.train)
         for l in self.linears:
             x = l(x)
         x = reshape.reshape(x, x.data.shape + (-1, ))
         ret.append(x)
     ret = concat.concat(ret, axis=2)
     return ret
Example #38
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.
        Args:
            x (~chainer.Variable): A new batch from the input sequence.
        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.
        """
        if self.upward.has_uninitialized_params:
            in_size = x.size // x.shape[0]
            self.upward._initialize_params(in_size)
            self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than the '
                       'size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(self.h, [batch],
                                                         axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(xp.zeros((batch, self.state_size),
                                                dtype=x.dtype),
                                       volatile='auto')
        # self.c, y = lstm.lstm(self.c, lstm_in)

        c, y = lstm.lstm(self.c, lstm_in)
        enable = (x.data != -1)
        self.c = where(enable, c, self.c)
        if self.h is not None:
            y = where(enable, y, self.h)

        if h_rest is None:
            self.h = y
        elif len(y.data) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Example #39
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.has_uninitialized_params:
            with cuda.get_device_from_id(self._device_id):
                in_size = x.size // x.shape[0]
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(
                    self.h, [batch], axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with cuda.get_device_from_id(self._device_id):
                self.c = variable.Variable(
                    xp.zeros((batch, self.state_size), dtype=x.dtype),
                    volatile='auto')
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.data) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Example #40
0
    def __call__(self, c, h):
        """Updates the internal state and returns the Cell outputs.

        Args:
            c (~chainer.Variable): The previous memory of the Grid cell.
            h (~chainer.Variable): The batched form of the previous state.

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
            ``c_new`` represents new cell state, and ``h_new`` is updated
            output of LSTM units.

        """
        assert h is not None
        assert c is not None
        c = split_axis.split_axis(c, self.out_indices, 1, True)
        h_list = []
        h_curr = None
        for layer_id, layer in enumerate(self):
            h_curr = layer(c[layer_id], h)
            h_list.append(h_curr)
        h_new = concat.concat([x[1] for x in h_list], 1)
        c_new = concat.concat([x[0] for x in h_list], 1)
        return c_new, h_new
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.W.data is None:
            with cuda.get_device_from_id(self._device_id):
                in_size = functools.reduce(operator.mul, x.shape[1:], 1)
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(self.h, [batch],
                                                         axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with cuda.get_device_from_id(self._device_id):
                self.c = variable.Variable(
                    xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.data) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
    def forward(self, x):
        """Computes the output of the Inception module.
        Args:
            x (~chainer.Variable): Input variable.
        Returns:
            Variable: Output variable. Its array has the same spatial size and
            the same minibatch size as the input array. The channel dimension
            has size ``out1 + out3 + out5 + proj_pool``.
        """
        out1 = self.conv1(x)
        out3 = self.conv3(relu.relu(self.proj3(x)))
        out5 = self.conv5(relu.relu(self.proj5(x)))
        pool = self.projp(max_pooling_2d.max_pooling_2d(x, 3, stride=1, pad=1))

        y = relu.relu(concat.concat((out1, out3, out5, pool), axis=1))
        return y
Example #43
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.W.array is None:
            with chainer.using_device(self.device):
                in_size = utils.size_of_shape(x.shape[1:])
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(
                    self.h, [batch], axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    self.xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.array) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Example #44
0
    def __call__(self, x, test=None):
        """Computes the output of the InceptionBN module.

        Args:
            x (Variable): An input variable.
            test (bool): If ``True``, batch normalization layers run in testing
                mode; if ``test`` is omitted, ``not self.train`` is used as
                ``test``.

        """
        if test is None:
            test = not self.train
        outs = []

        if self.out1 > 0:
            h1 = self.conv1(x)
            h1 = self.conv1n(h1, test=test)
            h1 = relu.relu(h1)
            outs.append(h1)

        h3 = relu.relu(self.proj3n(self.proj3(x), test=test))
        h3 = relu.relu(self.conv3n(self.conv3(h3), test=test))
        outs.append(h3)

        h33 = relu.relu(self.proj33n(self.proj33(x), test=test))
        h33 = relu.relu(self.conv33an(self.conv33a(h33), test=test))
        h33 = relu.relu(self.conv33bn(self.conv33b(h33), test=test))
        outs.append(h33)

        if self.pooltype == 'max':
            p = max_pooling_2d.max_pooling_2d(x,
                                              3,
                                              stride=self.stride,
                                              pad=1,
                                              cover_all=False)
        else:
            p = average_pooling_2d.average_pooling_2d(x,
                                                      3,
                                                      stride=self.stride,
                                                      pad=1)
        if self.proj_pool is not None:
            p = relu.relu(self.poolpn(self.poolp(p), test=test))
        outs.append(p)

        y = concat.concat(outs, axis=1)
        return y
Example #45
0
    def __call__(self, h):
        """Updates the internal state and returns the Cell outputs.

        Args:
            h (~chainer.Variable): The batched form of the previous state.
        Returns:
            ~chainer.Variable: Returns h_new), where
            ``h_new`` is updated output of LSTM units.

        """
        assert h is not None
        h_list = []
        h_curr = None
        for layer in self:
            h_curr = layer(h)
            h_list.append(h_curr)
        h_new = concat.concat(h_list, 1)
        return h_new
Example #46
0
    def __call__(self, x1, eps=0.001):
        h = x1
        h = F.relu(self.bn1(self.conv1(h)))
        h = F.max_pooling_2d(h, 3, stride=2)
        # Res Blocks
        h = self.res2(h)  # 1/4
        h = self.res3(h)  # 1/8
        pool1_8 = h
        h = self.res4(h)  # 1/16
        pool1_16 = h
        h = self.res5(h)  # 1/32

        # upscore 1/32 -> 1/8
        h = F.elu(self.bn_up32(self.upscore32(h)))
        upscore1 = h  # 1/8
        # upscore 1/16 -> 1/8
        upscore2 = F.elu(self.bn_up16(self.upscore16(pool1_16)))

        # concat conv
        h = concat.concat((upscore1, upscore2, pool1_8), axis=1)
        h = F.relu(self.bn_concat(self.concat_conv(h)))
        concat_pool = h

        # score
        h = F.elu(self.score_pool(concat_pool))
        score1_8 = h
        h = F.relu(self.upscore_final(h))
        score = h  # 1/1

        h_cp = F.relu(self.bn_cp1(self.conv_cp1(concat_pool)))
        h_ocp = h_cp

        h_cp = F.relu(self.bn_cp2(self.conv_cp2(h_cp)))
        h_cp = F.elu(self.bn_cp3(self.upscore_cp1(h_cp)))
        h_cp = self.upscore_cp2(h_cp)

        h_ocp = F.relu(self.bn_ocp2(self.conv_ocp2(h_ocp)))
        h_ocp = F.elu(self.bn_ocp3(self.upscore_ocp1(h_ocp)))
        h_ocp = self.upscore_ocp2(h_ocp)

        cp_score = F.tanh(h_cp) * self.output_scale
        ocp_score = F.tanh(h_ocp) * self.output_scale

        return score, cp_score, ocp_score
Example #47
0
    def __call__(self, x):
        """Computes the output of the Inception module.

        Args:
            x (~chainer.Variable): Input variable.

        Returns:
            Variable: Output variable. Its array has the same spatial size and
            the same minibatch size as the input array. The channel dimension
            has size ``out1 + out3 + out5 + proj_pool``.

        """
        out1 = self.conv1(x)
        out3 = self.conv3(relu.relu(self.proj3(x)))
        out5 = self.conv5(relu.relu(self.proj5(x)))
        pool = self.projp(max_pooling_2d.max_pooling_2d(
            x, 3, stride=1, pad=1))
        y = relu.relu(concat.concat((out1, out3, out5, pool), axis=1))
        return y
Example #48
0
    def __call__(self, h, x):
        """Updates the internal state and returns the  LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.
            h (~chainer.Variable): The list of the previous cell outputs.

        Returns:
            ~chainer.Variable: A list of the outputs (h) of the updated
                LSTM units over all the layers.

        """
        h_list = []
        h = split_axis.split_axis(h, self.num_layers, 1, True)
        h_curr = x
        for layer, h in six.moves.zip(self, h):
            h_curr = layer(h, h_curr)
            h_list.append(h_curr)
        return concat.concat(h_list, 1)
Example #49
0
    def forward_one_step(self, hps, x_data, sensor_data, y_data, train=True):
        x_r = Variable(x_data[:, 0:3, :, :], volatile=not train)
        x_s = Variable(sensor_data, volatile=not train)
        t = Variable(y_data, volatile=not train)

        cn1_r = F.max_pooling_2d(self.prelu1_r(
            self.bn1_r(self.conv1_r(x_r), test=not train)),
                                 ksize=2,
                                 stride=2,
                                 pad=0)
        cn2_r = F.max_pooling_2d(self.prelu2_r(
            self.bn2_r(self.conv2_r(cn1_r), test=not train)),
                                 ksize=2,
                                 stride=2,
                                 pad=0)
        cn3_r = F.max_pooling_2d(self.prelu3_r(
            self.bn3_r(self.conv3_r(cn2_r), test=not train)),
                                 ksize=2,
                                 stride=2,
                                 pad=0)
        cn5_rm = F.max_pooling_2d(self.prelu5(
            self.bn5(self.conv5(cn3_r), test=not train)),
                                  ksize=2,
                                  stride=2,
                                  pad=0)
        sen6 = F.dropout(self.prelu6(self.bn6(self.fc6(x_s), test=not train)),
                         ratio=hps.dropout,
                         train=train)
        cs7 = self.cccp7(
            concat.concat(
                (reshape.reshape(cn5_rm,
                                 (cn5_rm.shape[0], 1, cn5_rm.shape[1] *
                                  cn5_rm.shape[2] * cn5_rm.shape[3], 1)),
                 reshape.reshape(sen6, (sen6.shape[0], 1, sen6.shape[1], 1))),
                axis=1))
        cs8 = F.dropout(self.prelu8(self.bn8(self.fc8(cs7), test=not train)),
                        ratio=hps.dropout,
                        train=train)

        y = self.fc9(cs8)

        return y, F.mean_squared_error(y, t)
Example #50
0
    def __call__(self, x, test=None):
        """Computes the output of the InceptionBN module.

        Args:
            x (Variable): An input variable.
            test (bool): If ``True``, batch normalization layers run in testing
                mode; if ``test`` is omitted, ``not self.train`` is used as
                ``test``.

        """
        if test is None:
            test = not self.train
        outs = []

        if self.out1 > 0:
            h1 = self.conv1(x)
            h1 = self.conv1n(h1, test=test)
            h1 = relu.relu(h1)
            outs.append(h1)

        h3 = relu.relu(self.proj3n(self.proj3(x), test=test))
        h3 = relu.relu(self.conv3n(self.conv3(h3), test=test))
        outs.append(h3)

        h33 = relu.relu(self.proj33n(self.proj33(x), test=test))
        h33 = relu.relu(self.conv33an(self.conv33a(h33), test=test))
        h33 = relu.relu(self.conv33bn(self.conv33b(h33), test=test))
        outs.append(h33)

        if self.pooltype == 'max':
            p = max_pooling_2d.max_pooling_2d(x, 3, stride=self.stride, pad=1,
                                              cover_all=False)
        else:
            p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride,
                                                      pad=1)
        if self.proj_pool is not None:
            p = relu.relu(self.poolpn(self.poolp(p), test=test))
        outs.append(p)

        y = concat.concat(outs, axis=1)
        return y
Example #51
0
    def __call__(self, x1, x2, eps=0.001):
        h = x1
        ksizes = x2

        h = F.relu(self.bn1(self.conv1(h)))
        h = F.max_pooling_2d(h, 3, stride=2)
        # Res Blocks
        h = self.res2(h)  # 1/4
        h = self.res3(h)  # 1/8
        pool1_8 = h
        h = self.res4(h)  # 1/16
        pool1_16 = h
        h = self.res5(h)  # 1/32
        pool1_32 = h

        # upscore 1/32 -> 1/4
        pool1_32 = self.upscore1(pool1_32)
        # upscore 1/16 -> 1/4
        pool1_16 = self.upscore2(pool1_16)
        # upscore 1/8 -> 1/4
        pool1_8 = self.upscore3(pool1_8)

        # concat 1 / 4
        h = F.leaky_relu(
            self.bn_upscore(
                concat.concat((pool1_32, pool1_16, pool1_8), axis=1)))
        h = F.relu(self.concat_conv(h))

        ksizes = F.ceil(
            F.resize_images((ksizes * 3), (h.data.shape[2], h.data.shape[3])))
        h = convolutional_roi_pooling(h, ksizes, out_ksize=5)
        h = F.relu(self.bn_croip1(self.pool_roi_conv(h)))

        h = F.relu(self.bn_croip2(self.conv_after_croip(h)))

        # score
        h = F.relu(self.score_pool(h))
        h = F.relu(self.upscore_final(h))
        score = h  # 1/1

        return score
Example #52
0
    def __call__(self, x, top_n=None):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.
            top_n (int): The number of LSTMs from the top whose outputs
            you want (default: outputs of all LSTMs are returned)

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if top_n is None:
            top_n = self.num_layers

        h_list = []
        h_curr = x
        for layer in self:
            h_curr = layer(h_curr)
            h_list.append(h_curr)
        return concat.concat(h_list[-top_n:], 1)
Example #53
0
    def get_weights(self, hps, x_data, sensor_data, train=False):
        x_r = Variable(x_data[:, 0:3, :, :], volatile=not train)
        x_s = Variable(sensor_data, volatile=not train)

        cn1_r = F.max_pooling_2d(self.prelu1_r(
            self.bn1_r(self.conv1_r(x_r), test=not train)),
                                 ksize=2,
                                 stride=2,
                                 pad=0)
        cn2_r = F.max_pooling_2d(self.prelu2_r(
            self.bn2_r(self.conv2_r(cn1_r), test=not train)),
                                 ksize=2,
                                 stride=2,
                                 pad=0)
        cn3_r = F.max_pooling_2d(self.prelu3_r(
            self.bn3_r(self.conv3_r(cn2_r), test=not train)),
                                 ksize=2,
                                 stride=2,
                                 pad=0)
        cn5_rm = F.max_pooling_2d(self.prelu5(
            self.bn5(self.conv5(cn3_r), test=not train)),
                                  ksize=2,
                                  stride=2,
                                  pad=0)
        sen6 = F.dropout(self.prelu6(self.bn6(self.fc6(x_s), test=not train)),
                         ratio=hps.dropout,
                         train=train)
        cs7 = self.cccp7(
            concat.concat(
                (reshape.reshape(cn5_rm,
                                 (cn5_rm.shape[0], 1, cn5_rm.shape[1] *
                                  cn5_rm.shape[2] * cn5_rm.shape[3], 1)),
                 reshape.reshape(sen6, (sen6.shape[0], 1, sen6.shape[1], 1))),
                axis=1))
        cs8 = F.dropout(self.prelu8(self.bn8(self.fc8(cs7), test=not train)),
                        ratio=hps.dropout,
                        train=train)

        return cuda.to_cpu(cs8.data)
Example #54
0
    def __call__(self, x1):
        h = x1
        h = F.relu(self.bn1(self.conv1(h)))
        h = F.max_pooling_2d(h, 3, stride=2)
        # Res Blocks
        h = self.res2(h)  # 1/4
        h = self.res3(h)  # 1/8
        h = self.res4(h)  # 1/16
        pool1_16 = h
        h = self.res5(h)  # 1/32

        # upscore 1/32 -> 1/8
        h = F.elu(self.bn_up32(self.upscore32(h)))
        upscore1 = h  # 1/8
        # upscore 1/16 -> 1/8
        upscore2 = F.relu(self.bn_up16(self.upscore16(pool1_16)))

        # concat conv
        h = concat.concat((upscore1, upscore2), axis=1)
        h = F.relu(self.bn_concat(self.concat_conv(h)))
        concat_pool = F.dropout(h, ratio=0.1)

        # score
        h_cls = F.relu(self.bn_cls1(self.score_conv(F.dropout(h, ratio=0.1))))
        h_cls = F.relu(self.bn_cls2(self.upscore_final1(h_cls)))
        score = self.upscore_final2(h_cls)

        h_cp = F.relu(self.bn_cp2(self.conv_cp2(concat_pool)))
        h_cp = F.relu(self.bn_cp3(self.upscore_cp1(h_cp)))
        h_cp = self.upscore_cp2(h_cp)

        h_ocp = F.relu(self.bn_ocp2(self.conv_ocp2(concat_pool)))
        h_ocp = F.relu(self.bn_ocp3(self.upscore_ocp1(h_ocp)))
        h_ocp = self.upscore_ocp2(h_ocp)

        cp_score = F.tanh(h_cp)
        ocp_score = F.tanh(h_ocp)

        return score, cp_score, ocp_score
Example #55
0
    def __call__(self, x, top_n=None):
        """Updates the internal state and returns the GRU outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.
            top_n (int): The number of GRUs from the top whose outputs
            you want (default: outputs of all GRUs are returned)

        Returns:
            ~chainer.Variable: A concatenation of the outputs (h) of
            the updated GRU units over the top N layers; by default
            all layers are considered.

        """
        if top_n is None:
            top_n = self.num_layers

        h_list = []
        h_curr = x
        for layer in self:
            h_curr = layer(h_curr)
            h_list.append(h_curr)
        return concat.concat(h_list[-top_n:], 1)
Example #56
0
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs,
                    use_bi_direction, **kwargs):
    """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction)

    Base function for Stack GRU/BiGRU functions.

    This function is used at  :func:`chainer.functions.n_step_bigru` and
    :func:`chainer.functions.n_step_gru`.
    This function's behavior depends on argument ``use_bi_direction``.

    .. warning::

       ``train`` and ``use_cudnn`` arguments are not supported anymore since
       v2.
       Instead, use ``chainer.using_config('train', train)`` and
       ``chainer.using_config('use_cudnn', use_cudnn)`` respectively.
       See :func:`chainer.using_config`.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimension of hidden units. Because of bi-direction, the
            first dimension length is ``2S``.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing six matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing six vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimension of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        activation (str): Activation function name.
            Please select ``tanh`` or ``relu``.
        use_bi_direction (bool): If ``True``, this function uses
            Bi-direction GRU.

    .. seealso::
       :func:`chainer.functions.n_step_rnn`
       :func:`chainer.functions.n_step_birnn`

    """  # NOQA
    argument.check_unexpected_kwargs(
        kwargs, train='train argument is not supported anymore. '
        'Use chainer.using_config',
        use_cudnn='use_cudnn argument is not supported anymore. '
        'Use chainer.using_config')
    argument.assert_kwargs_empty(kwargs)

    xp = cuda.get_array_module(hx, hx.data)

    if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000):
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(itertools.chain(
            (hx, ),
            itertools.chain.from_iterable(ws),
            itertools.chain.from_iterable(bs),
            xs))
        if use_bi_direction:
            rnn = NStepBiGRU(n_layers, states)
        else:
            rnn = NStepGRU(n_layers, states)

        ret = rnn(*inputs)
        hy, = ret[:1]
        ys = ret[1:]
        return hy, ys

    else:
        direction = 2 if use_bi_direction else 1
        hx = split_axis.split_axis(hx, n_layers * direction, axis=0,
                                   force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]

        xws = [concat.concat([w[0], w[1], w[2]], axis=0) for w in ws]
        hws = [concat.concat([w[3], w[4], w[5]], axis=0) for w in ws]
        xbs = [concat.concat([b[0], b[1], b[2]], axis=0) for b in bs]
        hbs = [concat.concat([b[3], b[4], b[5]], axis=0) for b in bs]

        xs_next = xs
        hy = []
        for layer in six.moves.range(n_layers):

            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list

            # Forward GRU
            h, h_forward = _one_directional_loop(di=0)
            hy.append(h)

            if use_bi_direction:
                # Backward GRU
                h, h_backward = _one_directional_loop(di=1)
                h_backward.reverse()
                # Concat
                xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in
                           six.moves.zip(h_forward, h_backward)]
                hy.append(h)
            else:
                # Uni-directional GRU
                xs_next = h_forward

        ys = xs_next
        hy = stack.stack(hy)
        return hy, tuple(ys)
Example #57
0
def crf1d(cost, xs, ys, reduce='mean'):
    """Calculates negative log-likelihood of linear-chain CRF.

    It takes a transition cost matrix, a sequence of costs, and a sequence of
    labels. Let :math:`c_{st}` be a transition cost from a label :math:`s` to
    a label :math:`t`, :math:`x_{it}` be a cost of a label :math:`t` at
    position :math:`i`, and :math:`y_i` be an expected label at position
    :math:`i`. The negative log-likelihood of linear-chain CRF is defined as

    .. math::
        L = -\\left( \\sum_{i=1}^l x_{iy_i} + \\
             \\sum_{i=1}^{l-1} c_{y_i y_{i+1}} - {\\log(Z)} \\right) ,

    where :math:`l` is the length of the input sequence and :math:`Z` is the
    normalizing constant called partition function.

    .. note::

       When you want to calculate the negative log-likelihood of sequences
       which have different lengths, sort the sequences in descending order of
       lengths and transpose the sequences.
       For example, you have three input sequences:

       >>> a1 = a2 = a3 = a4 = np.random.uniform(-1, 1, 3).astype(np.float32)
       >>> b1 = b2 = b3 = np.random.uniform(-1, 1, 3).astype(np.float32)
       >>> c1 = c2 = np.random.uniform(-1, 1, 3).astype(np.float32)

       >>> a = [a1, a2, a3, a4]
       >>> b = [b1, b2, b3]
       >>> c = [c1, c2]

       where ``a1`` and all other variables are arrays with ``(K,)`` shape.
       Make a transpose of the sequences:

       >>> x1 = np.stack([a1, b1, c1])
       >>> x2 = np.stack([a2, b2, c2])
       >>> x3 = np.stack([a3, b3])
       >>> x4 = np.stack([a4])

       and make a list of the arrays:

       >>> xs = [x1, x2, x3, x4]

       You need to make label sequences in the same fashion.
       And then, call the function:

       >>> cost = chainer.Variable(
       ...     np.random.uniform(-1, 1, (3, 3)).astype(np.float32))
       >>> ys = [np.zeros(x.shape[0:1], dtype=np.int32) for x in xs]
       >>> loss = F.crf1d(cost, xs, ys)

       It calculates mean of the negative log-likelihood of the three
       sequences.

       The output is a variable whose value depends on the value of
       the option ``reduce``. If it is ``'no'``, it holds the elementwise
       loss values. If it is ``'mean'``, it holds mean of the loss values.


    Args:
        cost (Variable): A :math:`K \\times K` matrix which holds transition
            cost between two labels, where :math:`K` is the number of labels.
        xs (list of Variable): Input vector for each label.
            ``len(xs)`` denotes the length of the sequence,
            and each :class:`~chainer.Variable` holds a :math:`B \\times K`
            matrix, where :math:`B` is mini-batch size, :math:`K` is the number
            of labels.
            Note that :math:`B`\\ s in all the variables are not necessary
            the same, i.e., it accepts the input sequences with different
            lengths.
        ys (list of Variable): Expected output labels. It needs to have the
            same length as ``xs``. Each :class:`~chainer.Variable` holds a
            :math:`B` integer vector.
            When ``x`` in ``xs`` has the different :math:`B`, correspoding
            ``y`` has the same :math:`B`. In other words, ``ys`` must satisfy
            ``ys[i].shape == xs[i].shape[0:1]`` for all ``i``.
        reduce (str): Reduction option. Its value must be either
            ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised.

    Returns:
        ~chainer.Variable: A variable holding the average negative
        log-likelihood of the input sequences.

    .. note::

        See detail in the original paper: `Conditional Random Fields:
        Probabilistic Models for Segmenting and Labeling Sequence Data
        <https://repository.upenn.edu/cis_papers/159/>`_.

    """
    if reduce not in ('mean', 'no'):
        raise ValueError(
            "only 'mean' and 'no' are valid for 'reduce', but '%s' is "
            'given' % reduce)

    assert xs[0].shape[1] == cost.shape[0]

    n_label = cost.shape[0]
    n_batch = xs[0].shape[0]

    alpha = xs[0]
    alphas = []
    for x in xs[1:]:
        batch = x.shape[0]
        if alpha.shape[0] > batch:
            alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0)
            alphas.append(alpha_rest)
        b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost)
        alpha = logsumexp.logsumexp(b_alpha + b_cost, axis=1) + x

    if len(alphas) > 0:
        alphas.append(alpha)
        alpha = concat.concat(alphas[::-1], axis=0)

    logz = logsumexp.logsumexp(alpha, axis=1)

    cost = reshape.reshape(cost, (cost.size, 1))
    score = select_item.select_item(xs[0], ys[0])
    scores = []
    for x, y, y_prev in zip(xs[1:], ys[1:], ys[:-1]):
        batch = x.shape[0]
        if score.shape[0] > batch:
            y_prev, _ = split_axis.split_axis(y_prev, [batch], axis=0)
            score, score_rest = split_axis.split_axis(score, [batch], axis=0)
            scores.append(score_rest)
        score += (select_item.select_item(x, y) + reshape.reshape(
            embed_id.embed_id(y_prev * n_label + y, cost), (batch,)))

    if len(scores) > 0:
        scores.append(score)
        score = concat.concat(scores[::-1], axis=0)

    loss = logz - score
    if reduce == 'mean':
        return _sum.sum(loss) / n_batch
    else:
        return loss
Example #58
0
def n_step_lstm_base(
        n_layers, dropout_ratio, hx, cx, ws, bs, xs, train, use_cudnn,
        use_bi_direction):
    """Base function for Stack LSTM/BiLSTM functions.

    This function is used at :func:`chainer.functions.n_step_lstm` and
    :func:`chainer.functions.n_step_bilstm`.
    This function's behavior depends on following arguments,
    ``activation`` and ``use_bi_direction``.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimention of hidden units.
        cx (chainer.Variable): Variable holding stacked cell states.
            It has the same shape as ``hx``.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing eight matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing eight vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimention of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        train (bool): If ``True``, this function executes dropout.
        use_cudnn (bool): If ``True``, this function uses cuDNN if available.
        use_bi_direction (bool): If ``True``, this function uses Bi-directional
            LSTM.

    Returns:
        tuple: This functions returns a tuple concaining three elements,
            ``hy``, ``cy`` and ``ys``.
            - ``hy`` is an updated hidden states whose shape is same as ``hx``.
            - ``cy`` is an updated cell states whose shape is same as ``cx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
              mini-batch size for time ``t``, and ``N`` is size of hidden
              units. Note that ``B_t`` is the same value as ``xs[t]``.

    .. seealso::

       :func:`chainer.functions.n_step_lstm`
       :func:`chainer.functions.n_step_bilstm`

    """

    xp = cuda.get_array_module(hx, hx.data)

    if use_cudnn and xp is not numpy and cuda.cudnn_enabled and \
       _cudnn_version >= 5000:
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(itertools.chain(
            (hx, cx),
            itertools.chain.from_iterable(ws),
            itertools.chain.from_iterable(bs),
            xs))
        if use_bi_direction:
            rnn = NStepBiLSTM(n_layers, states, train=train)
        else:
            rnn = NStepLSTM(n_layers, states, train=train)

        ret = rnn(*inputs)
        hy, cy = ret[:2]
        ys = ret[2:]
        return hy, cy, ys

    else:
        direction = 2 if use_bi_direction else 1
        split_size = n_layers * direction
        hx = split_axis.split_axis(hx, split_size, axis=0, force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
        cx = split_axis.split_axis(cx, split_size, axis=0, force_tuple=True)
        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

        xs_next = xs
        hy = []
        cy = []
        for layer in six.moves.range(n_layers):

            def _one_directional_loop(di):
                # di=0, forward LSTM
                # di=1, backward LSTM
                h_list = []
                c_list = []
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                c = cx[layer_idx]
                if di == 0:
                    xs_list = xs_next
                else:
                    xs_list = reversed(xs_next)
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                    else:
                        h_rest = None
                        c_rest = None

                    if layer != 0:
                        x = dropout.dropout(x, ratio=dropout_ratio,
                                            train=train)
                    lstm_in = linear.linear(x, xws[layer_idx],
                                            xbs[layer_idx]) + \
                        linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    c_bar, h_bar = lstm.lstm(c, lstm_in)
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                        c = concat.concat([c_bar, c_rest], axis=0)
                    else:
                        h = h_bar
                        c = c_bar
                    h_list.append(h_bar)
                    c_list.append(c_bar)
                return h, c, h_list, c_list

            h, c, h_forward, c_forward = _one_directional_loop(di=0)
            hy.append(h)
            cy.append(c)

            if use_bi_direction:
                # BiLSTM
                h, c, h_backward, c_backward = _one_directional_loop(di=1)
                hy.append(h)
                cy.append(c)

                h_backward.reverse()
                # concat
                xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in
                           zip(h_forward, h_backward)]
            else:
                # Uni-directional RNN
                xs_next = h_forward

        ys = xs_next
        hy = stack.stack(hy)
        cy = stack.stack(cy)
        return hy, cy, tuple(ys)