def argmax_crf1d(cost, xs): alpha = xs[0] alphas = [] max_inds = [] for x in xs[1:]: batch = x.shape[0] if alpha.shape[0] > batch: alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0) alphas.append(alpha_rest) else: alphas.append(None) b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost) scores = b_alpha + b_cost max_ind = minmax.argmax(scores, axis=1) max_inds.append(max_ind) alpha = minmax.max(scores, axis=1) + x inds = minmax.argmax(alpha, axis=1) path = [inds.data] for m, a in zip(max_inds[::-1], alphas[::-1]): inds = select_item.select_item(m, inds) if a is not None: inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0) path.append(inds.data) path.reverse() score = minmax.max(alpha, axis=1) for a in alphas[::-1]: if a is None: continue score = concat.concat([score, minmax.max(a, axis=1)], axis=0) return score, path
def argmax_crf1d(cost, xs): """Computes a state that maximizes a joint probability of the given CRF. Args: cost (Variable): A :math:`K \\times K` matrix which holds transition cost between two labels, where :math:`K` is the number of labels. xs (list of Variable): Input vector for each label. ``len(xs)`` denotes the length of the sequence, and each :class:`~chainer.Variable` holds a :math:`B \\times K` matrix, where :math:`B` is mini-batch size, :math:`K` is the number of labels. Note that :math:`B` s in all the variables are not necessary the same, i.e., it accepts the input sequences with different lengths. Returns: tuple: A tuple of :class:`~chainer.Variable` object ``s`` and a :class:`list` ``ps``. The shape of ``s`` is ``(B,)``, where ``B`` is the mini-batch size. i-th element of ``s``, ``s[i]``, represents log-likelihood of i-th data. ``ps`` is a list of :class:`numpy.ndarray` or :class:`cupy.ndarray`, and denotes the state that maximizes the point probability. ``len(ps)`` is equal to ``len(xs)``, and shape of each ``ps[i]`` is the mini-batch size of the corresponding ``xs[i]``. That means, ``ps[i].shape == xs[i].shape[0:1]``. """ alpha = xs[0] alphas = [] max_inds = [] for x in xs[1:]: batch = x.shape[0] if alpha.shape[0] > batch: alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0) alphas.append(alpha_rest) else: alphas.append(None) b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost) scores = b_alpha + b_cost max_ind = minmax.argmax(scores, axis=1) max_inds.append(max_ind) alpha = minmax.max(scores, axis=1) + x inds = minmax.argmax(alpha, axis=1) path = [inds.data] for m, a in zip(max_inds[::-1], alphas[::-1]): inds = select_item.select_item(m, inds) if a is not None: inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0) path.append(inds.data) path.reverse() score = minmax.max(alpha, axis=1) for a in alphas[::-1]: if a is None: continue score = concat.concat([score, minmax.max(a, axis=1)], axis=0) return score, path
def __call__(self, *cshsx): """Returns new cell state and output of Child-Sum TreeLSTM. Args: cshsx (list of :class:`~chainer.Variable`): Variable arguments which include all cell vectors and all output vectors of variable children, and an input vector. Returns: tuple of ~chainer.Variable: Returns :math:`(c_{new}, h_{new})`, where :math:`c_{new}` represents new cell state vector, and :math:`h_{new}` is new output vector. """ cs = cshsx[:len(cshsx) // 2] hs = cshsx[len(cshsx) // 2:-1] x = cshsx[-1] assert (len(cshsx) % 2 == 1) assert (len(cs) == len(hs)) if x is None: if any(c is not None for c in cs): base = [c for c in cs if c is not None][0] elif any(h is not None for h in hs): base = [h for h in hs if h is not None][0] else: raise ValueError('All inputs (cs, hs, x) are None.') batchsize, dtype = base.shape[0], base.dtype x = self.xp.zeros((batchsize, self.in_size), dtype=dtype) W_x_in = self.W_x(x) W_x_aio_in, W_x_f_in = split_axis.split_axis(W_x_in, [3 * self.state_size], axis=1) if len(hs) == 0: aio_in = W_x_aio_in a, i, o = split_axis.split_axis(aio_in, 3, axis=1) c = sigmoid.sigmoid(i) * tanh.tanh(a) h = sigmoid.sigmoid(o) * tanh.tanh(c) return c, h hs = self._pad_zero_nodes(hs, (x.shape[0], self.state_size), dtype=x.dtype) cs = self._pad_zero_nodes(cs, (x.shape[0], self.state_size), dtype=x.dtype) aio_in = self.W_h_aio(sum(hs)) + W_x_aio_in W_h_fs_in = concat.concat(split_axis.split_axis(self.W_h_f( concat.concat(hs, axis=0)), len(hs), axis=0), axis=1) f_in = W_h_fs_in + \ concat.concat([W_x_f_in] * len(hs), axis=1) tree_lstm_in = concat.concat([aio_in, f_in], axis=1) return tree_lstm.tree_lstm(*(cs + (tree_lstm_in, )))
def __call__(self, c, h): """Updates the internal state and returns the Cell outputs. Remember to treat this Grid cell as if its an LSTM and pass ``c`` as well as ``h``. Only parts of ``c`` will be used depending on whether there is a LSTM or not. Args: c (~chainer.Variable): The previous memory information. h (~chainer.Variable): The previous state information. Returns: tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where ``c_new`` represents new cell state, and ``h_new`` is updated output. Parts of ``c_new`` will be useless. """ assert h is not None assert c is not None c = split_axis.split_axis(c, self.out_indices, 1, True) h_list = [] h_curr = None for layer_id, layer in enumerate(self): layer_params = inspect.getargspec(layer)[0] if 'c' in layer_params: h_curr = layer(c[layer_id], h) else: h_curr = (c[layer_id], layer(h)) h_list.append(h_curr) h_new = concat.concat([x[1] for x in h_list], 1) c_new = concat.concat([x[0] for x in h_list], 1) return c_new, h_new
def argmax_crf1d(cost, xs): """Computes a state that maximizes a joint probability of the given CRF. Args: cost (Variable): A :math:`K \\times K` matrix which holds transition cost between two labels, where :math:`K` is the number of labels. xs (list of Variable): Input vector for each label. ``len(xs)`` denotes the length of the sequence, and each :class:`~chainer.Variable` holds a :math:`B \\times K` matrix, where :math:`B` is mini-batch size, :math:`K` is the number of labels. Note that :math:`B`\\ s in all the variables are not necessary the same, i.e., it accepts the input sequences with different lengths. Returns: tuple: A tuple of :class:`~chainer.Variable` object ``s`` and a :class:`list` ``ps``. The shape of ``s`` is ``(B,)``, where ``B`` is the mini-batch size. i-th element of ``s``, ``s[i]``, represents log-likelihood of i-th data. ``ps`` is a list of :class:`numpy.ndarray` or :class:`cupy.ndarray`, and denotes the state that maximizes the point probability. ``len(ps)`` is equal to ``len(xs)``, and shape of each ``ps[i]`` is the mini-batch size of the corresponding ``xs[i]``. That means, ``ps[i].shape == xs[i].shape[0:1]``. """ alpha = xs[0] alphas = [] max_inds = [] for x in xs[1:]: batch = x.shape[0] if alpha.shape[0] > batch: alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0) alphas.append(alpha_rest) else: alphas.append(None) b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost) scores = b_alpha + b_cost max_ind = minmax.argmax(scores, axis=1) max_inds.append(max_ind) alpha = minmax.max(scores, axis=1) + x inds = minmax.argmax(alpha, axis=1) path = [inds.data] for m, a in zip(max_inds[::-1], alphas[::-1]): inds = select_item.select_item(m, inds) if a is not None: inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0) path.append(inds.data) path.reverse() score = minmax.max(alpha, axis=1) for a in alphas[::-1]: if a is None: continue score = concat.concat([score, minmax.max(a, axis=1)], axis=0) return score, path
def __call__(self, x1, x2, eps=0.001, test=None): h = x1 h_d = x2 h = F.relu(self.bn1(self.conv1(h))) h = F.max_pooling_2d(h, 3, stride=2) # Res Blocks h = self.res2(h, test=test) # 1/4 h = self.res3(h, test=test) # 1/8 pool1_8 = h h = self.res4(h, test=test) # 1/16 pool1_16 = h h = self.res5(h, test=test) # 1/32 pool1_32 = h # upscore 1/32 -> 1/1 h = self.upscore32(pool1_32) upscore1 = h # 1/1 # upscore 1/16 -> 1/1 h = self.upscore16(pool1_16) upscore2 = h # 1/1 # concat conv h = concat.concat((upscore1, upscore2, pool1_8), axis=1) h = F.relu(self.concat_conv(h)) concat_pool = h # score h = F.relu(self.score_pool(concat_pool)) score1_8 = h h = F.relu(self.upscore_final(h)) score = h # 1/1 self.score = score # XXX: for backward compatibility h = F.relu(self.cls_pool(concat_pool)) cls_pool1_8 = h h_d = F.relu(self.bn_d1_1(self.conv_d1_1(h_d))) h_d = F.relu(self.bn_d1_2(self.conv_d1_2(h_d))) h_d = F.max_pooling_2d(h_d, 2, stride=2, pad=0) # 1/2 h_d = F.relu(self.bn_d2(self.conv_d2(h_d))) h_d = F.max_pooling_2d(h_d, 2, stride=2, pad=0) # 1/4 h_d = F.relu(self.bn_d3(self.conv_d3(h_d))) h_d = F.max_pooling_2d(h_d, 2, stride=2, pad=0) # 1/8 h_cp = concat.concat((h_d, pool1_8, cls_pool1_8), axis=1) h_cp = F.relu(self.bn_cp_1(self.conv_cp_1(h_cp))) h_cp = F.relu(self.bn_cp_2(self.conv_cp_2(h_cp))) h_cp = F.relu(self.bn_cp_3(self.upscore_cp1(h_cp))) cp_score = F.arctan(self.upscore_cp2(h_cp)) h_rot = concat.concat((h_d, pool1_8, cls_pool1_8), axis=1) h_rot = F.relu(self.bn_rot_1(self.conv_rot_1(h_rot))) h_rot = F.relu(self.bn_rot_2(self.conv_rot_2(h_rot))) h_rot = F.relu(self.bn_rot_3(self.upscore_rot1(h_rot))) rot_score = F.tanh(self.upscore_rot2(h_rot)) return score, cp_score, rot_score
def forward(self, *cshsx): """Returns new cell state and output of Child-Sum TreeLSTM. Args: cshsx (list of :class:`~chainer.Variable`): Variable arguments which include all cell vectors and all output vectors of variable children, and an input vector. Returns: tuple of ~chainer.Variable: Returns :math:`(c_{new}, h_{new})`, where :math:`c_{new}` represents new cell state vector, and :math:`h_{new}` is new output vector. """ cs = cshsx[:len(cshsx) // 2] hs = cshsx[len(cshsx) // 2:-1] x = cshsx[-1] assert(len(cshsx) % 2 == 1) assert(len(cs) == len(hs)) if x is None: if any(c is not None for c in cs): base = [c for c in cs if c is not None][0] elif any(h is not None for h in hs): base = [h for h in hs if h is not None][0] else: raise ValueError('All inputs (cs, hs, x) are None.') batchsize, dtype = base.shape[0], base.dtype x = self.xp.zeros( (batchsize, self.in_size), dtype=dtype) W_x_in = self.W_x(x) W_x_aio_in, W_x_f_in = split_axis.split_axis( W_x_in, [3 * self.state_size], axis=1) if len(hs) == 0: aio_in = W_x_aio_in a, i, o = split_axis.split_axis(aio_in, 3, axis=1) c = sigmoid.sigmoid(i) * tanh.tanh(a) h = sigmoid.sigmoid(o) * tanh.tanh(c) return c, h hs = self._pad_zero_nodes( hs, (x.shape[0], self.state_size), dtype=x.dtype) cs = self._pad_zero_nodes( cs, (x.shape[0], self.state_size), dtype=x.dtype) aio_in = self.W_h_aio(sum(hs)) + W_x_aio_in W_h_fs_in = concat.concat(split_axis.split_axis( self.W_h_f(concat.concat(hs, axis=0)), len(hs), axis=0), axis=1) f_in = W_h_fs_in + \ concat.concat([W_x_f_in] * len(hs), axis=1) tree_lstm_in = concat.concat([aio_in, f_in], axis=1) return tree_lstm.tree_lstm(*(cs + (tree_lstm_in, )))
def forward(self, x): outs = [] if self.out1 > 0: h1 = self.conv1(x) h1 = self.conv1n(h1) h1 = relu.relu(h1) outs.append(h1) h3 = relu.relu(self.proj3n(self.proj3(x))) h3 = relu.relu(self.conv3n(self.conv3(h3))) outs.append(h3) h33 = relu.relu(self.proj33n(self.proj33(x))) h33 = relu.relu(self.conv33an(self.conv33a(h33))) h33 = relu.relu(self.conv33bn(self.conv33b(h33))) outs.append(h33) if self.pooltype == 'max': p = max_pooling_nd.max_pooling_2d(x, 3, stride=self.stride, pad=1, cover_all=False) else: p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride, pad=1) if self.proj_pool is not None: p = relu.relu(self.poolpn(self.poolp(p))) outs.append(p) y = concat.concat(outs, axis=1) return y
def _one_directional_loop(di): # di=0, forward RNN # di=1, backward RNN xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) rnn_in = ( linear.linear(x, xws[layer_idx], xbs[layer_idx]) + linear.linear(h, hws[layer_idx], hbs[layer_idx])) if activation == 'tanh': h_bar = tanh.tanh(rnn_in) elif activation == 'relu': h_bar = relu.relu(rnn_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def _gru(x, h, c, w, b): xw = concat.concat([w[0], w[1], w[2]], axis=0) hw = concat.concat([w[3], w[4], w[5]], axis=0) xb = concat.concat([b[0], b[1], b[2]], axis=0) hb = concat.concat([b[3], b[4], b[5]], axis=0) gru_x = linear.linear(x, xw, xb) gru_h = linear.linear(h, hw, hb) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) return (1 - z) * h_bar + z * h, None
def __call__(self, x, **kwargs): lstm_fw = self.lstm_fw(x, **kwargs) lstm_bw = self.lstm_bw(x, **kwargs) if self.concat: return concat([lstm_fw, lstm_bw], axis=2) else: return lstm_fw + lstm_bw
def __call__(self, x): outs = [] if self.out1 > 0: h1 = self.f.conv1(x) h1 = self.f.conv1n(h1) h1 = relu.relu(h1) outs.append(h1) h3 = relu.relu(self.f.proj3n(self.f.proj3(x))) h3 = relu.relu(self.f.conv3n(self.f.conv3(h3))) outs.append(h3) h33 = relu.relu(self.f.proj33n(self.f.proj33(x))) h33 = relu.relu(self.f.conv33an(self.f.conv33a(h33))) h33 = relu.relu(self.f.conv33bn(self.f.conv33b(h33))) outs.append(h33) p = self.f.pool(x) if self.proj_pool is not None: p = relu.relu(self.f.poolpn(self.f.poolp(p))) outs.append(p) y = concat.concat(outs, axis=1) return y
def _offset2grid(offset, kh, kw, sy, sx, ph, pw, h, w): n, khkw2, out_h, out_w = offset.shape khkw = int(khkw2 / 2) xp = cuda.get_array_module(offset) ys, xs = xp.meshgrid( xp.arange(0, sy * out_h, sy, dtype=numpy.float32), xp.arange(0, sx * out_w, sx, dtype=numpy.float32), indexing='ij', copy=False ) filter_offset_x = xp.tile(xp.arange(kw, dtype=numpy.float32), kh) filter_offset_y = xp.repeat(xp.arange(kh, dtype=numpy.float32), kw) x_coord = (offset[:, :khkw] + xs[None, None] + filter_offset_x[None, :, None, None]) y_coord = (offset[:, khkw:] + ys[None, None] + filter_offset_y[None, :, None, None]) # The values of this variable is clipped in range [-1, 1]. # The coordinate (-1, -1) corresponds to the upper-left # corner of the input image. x_coord = (x_coord / (w + 2 * pw - 1) - 0.5) * 2 y_coord = (y_coord / (h + 2 * ph - 1) - 0.5) * 2 # Shape of `coord` is (n, 2 * kh * kw, out_h, out_w) coord = concat.concat([x_coord, y_coord], axis=1) return coord
def __call__(self, x): test = not self.train outs = [] if self.out1 > 0: h1 = self.conv1(x) h1 = self.conv1n(h1, test=test) h1 = relu.relu(h1) outs.append(h1) h3 = relu.relu(self.proj3n(self.proj3(x), test=test)) h3 = relu.relu(self.conv3n(self.conv3(h3), test=test)) outs.append(h3) h33 = relu.relu(self.proj33n(self.proj33(x), test=test)) h33 = relu.relu(self.conv33an(self.conv33a(h33), test=test)) h33 = relu.relu(self.conv33bn(self.conv33b(h33), test=test)) outs.append(h33) if self.pooltype == 'max': p = max_pooling_2d.max_pooling_2d(x, 3, stride=self.stride, pad=1) else: p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride, pad=1) if self.proj_pool is not None: p = relu.relu(self.poolpn(self.poolp(p), test=test)) outs.append(p) y = concat.concat(outs, axis=1) return y
def __call__(self, x1, x2): h = x1 ksizes = x2 h = F.relu(self.bn1(self.conv1(h))) h = F.max_pooling_2d(h, 3, stride=2) # Res Blocks h = self.res2(h) # 1/4 pool1_4 = h h = self.res3(h) # 1/8 # upscore 1/8 -> 1/4 pool1_8 = self.upscore2(h) h = self.res4(h) # 1/16 # upscore 1/16 -> 1/4 h = self.upscore1(h) # concat 1 / 4 h = F.leaky_relu( self.bn_upscore(concat.concat((h, pool1_8, pool1_4), axis=1))) h = F.relu(self.bn_concat(self.concat_conv(h))) ksizes = F.ceil( F.resize_images((ksizes * 5), (h.data.shape[2], h.data.shape[3]))) h = convolutional_roi_pooling(h, ksizes, out_ksize=self.out_ksize) h = F.relu(self.bn_croip1(self.pool_roi_conv(h))) h = F.relu(self.bn_croip2(self.conv_after_croip1(h))) h = F.relu(self.bn_croip3(self.conv_after_croip2(h))) # score #1 / 4 h = F.relu(self.score_conv(h)) score = h # 1/4 return score
def _one_directional_loop(di): # di=0, forward RNN # di=1, backward RNN xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) rnn_in = (linear.linear(x, xws[layer_idx], xbs[layer_idx]) + linear.linear(h, hws[layer_idx], hbs[layer_idx])) if activation == 'tanh': h_bar = tanh.tanh(rnn_in) elif activation == 'relu': h_bar = relu.relu(rnn_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def _one_directional_loop(di): # di=0, forward GRU # di=1, backward GRU xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx]) gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx]) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) h_bar = (1 - z) * h_bar + z * h if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def _offset2grid(offset, kh, kw, sy, sx, ph, pw, h, w): n, khkw2, out_h, out_w = offset.shape khkw = int(khkw2 / 2) xp = cuda.get_array_module(offset) ys, xs = xp.meshgrid(xp.arange(0, sy * out_h, sy, dtype=numpy.float32), xp.arange(0, sx * out_w, sx, dtype=numpy.float32), indexing='ij', copy=False) filter_offset_x = xp.tile(xp.arange(kw, dtype=numpy.float32), kh) filter_offset_y = xp.repeat(xp.arange(kh, dtype=numpy.float32), kw) x_coord = (offset[:, :khkw] + xs[None, None] + filter_offset_x[None, :, None, None]) y_coord = (offset[:, khkw:] + ys[None, None] + filter_offset_y[None, :, None, None]) # The values of this variable is clipped in range [-1, 1]. # The coordinate (-1, -1) corresponds to the upper-left # corner of the input image. x_coord = (x_coord / (w + 2 * pw - 1) - 0.5) * 2 y_coord = (y_coord / (h + 2 * ph - 1) - 0.5) * 2 # Shape of `coord` is (n, 2 * kh * kw, out_h, out_w) coord = concat.concat([x_coord, y_coord], axis=1) return coord
def forward(self, x): outs = [] if self.out1 > 0: h1 = self.conv1(x) h1 = self.conv1n(h1) h1 = relu.relu(h1) outs.append(h1) h3 = relu.relu(self.proj3n(self.proj3(x))) h3 = relu.relu(self.conv3n(self.conv3(h3))) outs.append(h3) h33 = relu.relu(self.proj33n(self.proj33(x))) h33 = relu.relu(self.conv33an(self.conv33a(h33))) h33 = relu.relu(self.conv33bn(self.conv33b(h33))) outs.append(h33) if self.pooltype == 'max': p = max_pooling_2d.max_pooling_2d(x, 3, stride=self.stride, pad=1, cover_all=False) else: p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride, pad=1) if self.proj_pool is not None: p = relu.relu(self.poolpn(self.poolp(p))) outs.append(p) y = concat.concat(outs, axis=1) return y
def __call__(self, x): out1 = self.f.conv1(x) out3 = self.f.conv3(relu.relu(self.f.proj3(x))) out5 = self.f.conv5(relu.relu(self.f.proj5(x))) pool = self.f.projp(max_pooling_2d.max_pooling_2d( x, 3, stride=1, pad=1)) y = relu.relu(concat.concat((out1, out3, out5, pool), axis=1)) return y
def __call__(self, x): out1 = self.f.conv1(x) out3 = self.f.conv3(relu.relu(self.f.proj3(x))) out5 = self.f.conv5(relu.relu(self.f.proj5(x))) pool = self.f.projp( max_pooling_2d.max_pooling_2d(x, 3, stride=1, pad=1)) y = relu.relu(concat.concat((out1, out3, out5, pool), axis=1)) return y
def _one_directional_loop(f, xs, h, c, w, b): h_list = [] for x in xs: batch = len(x) need_split = len(h) > batch if need_split: h, h_rest = split_axis.split_axis(h, [batch], axis=0) if c is not None: c, c_rest = split_axis.split_axis(c, [batch], axis=0) h, c = f(x, h, c, w, b) h_list.append(h) if need_split: h = concat.concat([h, h_rest], axis=0) if c is not None: c = concat.concat([c, c_rest], axis=0) return h, c, h_list
def _one_directional_loop(di): # di=0, forward LSTM # di=1, backward LSTM h_list = [] c_list = [] layer_idx = direction * layer + di h = hx[layer_idx] c = cx[layer_idx] if di == 0: xs_list = xs_next else: xs_list = reversed(xs_next) counter = 0 for x in xs_list: counter += 1 batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None c_rest = None if layer != 0: x = dropout.dropout(x, ratio=dropout_ratio) if counter == 4: lstm_in = linear.linear(x, xws[layer_idx], xbs[layer_idx]) else: lstm_in = linear.linear( x, xws[layer_idx], xbs[layer_idx]) + linear.linear( h, hws[layer_idx], hbs[layer_idx]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_list.append(h_bar) c_list.append(c_bar) return h, c, h_list, c_list
def n_step_rnn_impl(f, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction): direction = 2 if use_bi_direction else 1 hx = chainer.functions.separate(hx) use_cell = cx is not None if use_cell: cx = chainer.functions.separate(cx) else: cx = [None] * len(hx) xs_next = xs hy = [] cy = [] for layer in six.moves.range(n_layers): # Forward RNN if layer == 0: xs = xs_next else: xs = _dropout_sequence(xs_next, dropout_ratio) idx = direction * layer h, c, h_forward = _one_directional_loop(f, xs, hx[idx], cx[idx], ws[idx], bs[idx]) hy.append(h) cy.append(c) if use_bi_direction: # Backward RNN idx = direction * layer + 1 if layer == 0: xs = xs_next else: xs = _dropout_sequence(xs_next, dropout_ratio) h, c, h_backward = _one_directional_loop(f, reversed(xs), hx[idx], cx[idx], ws[idx], bs[idx]) h_backward.reverse() # Concat xs_next = [ concat.concat([hfi, hbi], axis=1) for hfi, hbi in six.moves.zip(h_forward, h_backward) ] hy.append(h) cy.append(c) else: # Uni-directional RNN xs_next = h_forward ys = xs_next hy = stack.stack(hy) if use_cell: cy = stack.stack(cy) else: cy = None return hy, cy, tuple(ys)
def __call__(self, x): x = self.embed(x) xs = split_axis.split_axis(x, x.data.shape[1], 1) ret = [] for x in xs: x = self.rnn1(x) x = self.rnn2(x) x = self.linear(x) x = reshape.reshape(x, x.data.shape + (-1, )) ret.append(x) ret = concat.concat(ret, axis=2) return ret
def n_step_rnn_impl( f, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction): direction = 2 if use_bi_direction else 1 hx = chainer.functions.separate(hx) use_cell = cx is not None if use_cell: cx = chainer.functions.separate(cx) else: cx = [None] * len(hx) xs_next = xs hy = [] cy = [] for layer in six.moves.range(n_layers): # Forward RNN if layer == 0: xs = xs_next else: xs = _dropout_sequence(xs_next, dropout_ratio) idx = direction * layer h, c, h_forward = _one_directional_loop( f, xs, hx[idx], cx[idx], ws[idx], bs[idx]) hy.append(h) cy.append(c) if use_bi_direction: # Backward RNN idx = direction * layer + 1 if layer == 0: xs = xs_next else: xs = _dropout_sequence(xs_next, dropout_ratio) h, c, h_backward = _one_directional_loop( f, reversed(xs), hx[idx], cx[idx], ws[idx], bs[idx]) h_backward.reverse() # Concat xs_next = [concat.concat([hfi, hbi], axis=1) for hfi, hbi in six.moves.zip(h_forward, h_backward)] hy.append(h) cy.append(c) else: # Uni-directional RNN xs_next = h_forward ys = xs_next hy = stack.stack(hy) if use_cell: cy = stack.stack(cy) else: cy = None return hy, cy, tuple(ys)
def stack(xs, axis=0): """Concatenate variables along a new axis. Args: xs (list of chainer.Variable): Variables to be concatenated. axis (int): Axis of result along which variables are stacked. Returns: ~chainer.Variable: Output variable. """ xs = [expand_dims.expand_dims(x, axis=axis) for x in xs] return concat.concat(xs, axis=axis)
def black_out(x, t, W, samples): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. Args: x (~chainer.Variable): Batch of input vectors. t (~chainer.Variable): Vector of ground truth labels. W (~chainer.Variable): Weight matrix. samples (~chainer.Variable): Negative samples. Returns: ~chainer.Variable: Loss value. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.batch_matmul(neg_emb, x) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.batch_matmul(pos_emb, x) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast( reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size,)) loss = py - logz + _sum.sum(ny, axis=1) return -_sum.sum(loss) / batch_size
def black_out(x, t, W, samples): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. Args: x (~chainer.Variable): Batch of input vectors. t (~chainer.Variable): Vector of ground truth labels. W (~chainer.Variable): Weight matrix. samples (~chainer.Variable): Negative samples. Returns: ~chainer.Variable: Loss value. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.batch_matmul(neg_emb, x) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.batch_matmul(pos_emb, x) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast(reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size, )) loss = py - logz + _sum.sum(ny, axis=1) return -_sum.sum(loss) / batch_size
def _one_directional_loop(di): # di=0, forward LSTM # di=1, backward LSTM h_list = [] c_list = [] layer_idx = direction * layer + di h = hx[layer_idx] c = cx[layer_idx] if di == 0: xs_list = xs_next else: xs_list = reversed(xs_next) for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None c_rest = None if layer != 0: x = dropout.dropout(x, ratio=dropout_ratio, train=train) lstm_in = linear.linear(x, xws[layer_idx], xbs[layer_idx]) + \ linear.linear(h, hws[layer_idx], hbs[layer_idx]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_list.append(h_bar) c_list.append(c_bar) return h, c, h_list, c_list
def __call__(self, x): x = self.embed(x) xs = split_axis.split_axis(x, x.data.shape[1], 1) ret = [] for x in xs: for l in self.rnns: x = l(x) x = dropout.dropout(x, 0.25, self.train) for l in self.linears: x = l(x) x = reshape.reshape(x, x.data.shape + (-1, )) ret.append(x) ret = concat.concat(ret, axis=2) return ret
def __call__(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.has_uninitialized_params: in_size = x.size // x.shape[0] self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than the ' 'size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis(self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp self.c = variable.Variable(xp.zeros((batch, self.state_size), dtype=x.dtype), volatile='auto') # self.c, y = lstm.lstm(self.c, lstm_in) c, y = lstm.lstm(self.c, lstm_in) enable = (x.data != -1) self.c = where(enable, c, self.c) if self.h is not None: y = where(enable, y, self.h) if h_rest is None: self.h = y elif len(y.data) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def __call__(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.has_uninitialized_params: with cuda.get_device_from_id(self._device_id): in_size = x.size // x.shape[0] self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than' 'the size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis( self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with cuda.get_device_from_id(self._device_id): self.c = variable.Variable( xp.zeros((batch, self.state_size), dtype=x.dtype), volatile='auto') self.c, y = lstm.lstm(self.c, lstm_in) if h_rest is None: self.h = y elif len(y.data) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def __call__(self, c, h): """Updates the internal state and returns the Cell outputs. Args: c (~chainer.Variable): The previous memory of the Grid cell. h (~chainer.Variable): The batched form of the previous state. Returns: tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where ``c_new`` represents new cell state, and ``h_new`` is updated output of LSTM units. """ assert h is not None assert c is not None c = split_axis.split_axis(c, self.out_indices, 1, True) h_list = [] h_curr = None for layer_id, layer in enumerate(self): h_curr = layer(c[layer_id], h) h_list.append(h_curr) h_new = concat.concat([x[1] for x in h_list], 1) c_new = concat.concat([x[0] for x in h_list], 1) return c_new, h_new
def __call__(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.W.data is None: with cuda.get_device_from_id(self._device_id): in_size = functools.reduce(operator.mul, x.shape[1:], 1) self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than' 'the size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis(self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with cuda.get_device_from_id(self._device_id): self.c = variable.Variable( xp.zeros((batch, self.state_size), dtype=x.dtype)) self.c, y = lstm.lstm(self.c, lstm_in) if h_rest is None: self.h = y elif len(y.data) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def forward(self, x): """Computes the output of the Inception module. Args: x (~chainer.Variable): Input variable. Returns: Variable: Output variable. Its array has the same spatial size and the same minibatch size as the input array. The channel dimension has size ``out1 + out3 + out5 + proj_pool``. """ out1 = self.conv1(x) out3 = self.conv3(relu.relu(self.proj3(x))) out5 = self.conv5(relu.relu(self.proj5(x))) pool = self.projp(max_pooling_2d.max_pooling_2d(x, 3, stride=1, pad=1)) y = relu.relu(concat.concat((out1, out3, out5, pool), axis=1)) return y
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.W.array is None: with chainer.using_device(self.device): in_size = utils.size_of_shape(x.shape[1:]) self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than' 'the size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis( self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: with chainer.using_device(self.device): self.c = variable.Variable( self.xp.zeros((batch, self.state_size), dtype=x.dtype)) self.c, y = lstm.lstm(self.c, lstm_in) if h_rest is None: self.h = y elif len(y.array) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def __call__(self, x, test=None): """Computes the output of the InceptionBN module. Args: x (Variable): An input variable. test (bool): If ``True``, batch normalization layers run in testing mode; if ``test`` is omitted, ``not self.train`` is used as ``test``. """ if test is None: test = not self.train outs = [] if self.out1 > 0: h1 = self.conv1(x) h1 = self.conv1n(h1, test=test) h1 = relu.relu(h1) outs.append(h1) h3 = relu.relu(self.proj3n(self.proj3(x), test=test)) h3 = relu.relu(self.conv3n(self.conv3(h3), test=test)) outs.append(h3) h33 = relu.relu(self.proj33n(self.proj33(x), test=test)) h33 = relu.relu(self.conv33an(self.conv33a(h33), test=test)) h33 = relu.relu(self.conv33bn(self.conv33b(h33), test=test)) outs.append(h33) if self.pooltype == 'max': p = max_pooling_2d.max_pooling_2d(x, 3, stride=self.stride, pad=1, cover_all=False) else: p = average_pooling_2d.average_pooling_2d(x, 3, stride=self.stride, pad=1) if self.proj_pool is not None: p = relu.relu(self.poolpn(self.poolp(p), test=test)) outs.append(p) y = concat.concat(outs, axis=1) return y
def __call__(self, h): """Updates the internal state and returns the Cell outputs. Args: h (~chainer.Variable): The batched form of the previous state. Returns: ~chainer.Variable: Returns h_new), where ``h_new`` is updated output of LSTM units. """ assert h is not None h_list = [] h_curr = None for layer in self: h_curr = layer(h) h_list.append(h_curr) h_new = concat.concat(h_list, 1) return h_new
def __call__(self, x1, eps=0.001): h = x1 h = F.relu(self.bn1(self.conv1(h))) h = F.max_pooling_2d(h, 3, stride=2) # Res Blocks h = self.res2(h) # 1/4 h = self.res3(h) # 1/8 pool1_8 = h h = self.res4(h) # 1/16 pool1_16 = h h = self.res5(h) # 1/32 # upscore 1/32 -> 1/8 h = F.elu(self.bn_up32(self.upscore32(h))) upscore1 = h # 1/8 # upscore 1/16 -> 1/8 upscore2 = F.elu(self.bn_up16(self.upscore16(pool1_16))) # concat conv h = concat.concat((upscore1, upscore2, pool1_8), axis=1) h = F.relu(self.bn_concat(self.concat_conv(h))) concat_pool = h # score h = F.elu(self.score_pool(concat_pool)) score1_8 = h h = F.relu(self.upscore_final(h)) score = h # 1/1 h_cp = F.relu(self.bn_cp1(self.conv_cp1(concat_pool))) h_ocp = h_cp h_cp = F.relu(self.bn_cp2(self.conv_cp2(h_cp))) h_cp = F.elu(self.bn_cp3(self.upscore_cp1(h_cp))) h_cp = self.upscore_cp2(h_cp) h_ocp = F.relu(self.bn_ocp2(self.conv_ocp2(h_ocp))) h_ocp = F.elu(self.bn_ocp3(self.upscore_ocp1(h_ocp))) h_ocp = self.upscore_ocp2(h_ocp) cp_score = F.tanh(h_cp) * self.output_scale ocp_score = F.tanh(h_ocp) * self.output_scale return score, cp_score, ocp_score
def __call__(self, x): """Computes the output of the Inception module. Args: x (~chainer.Variable): Input variable. Returns: Variable: Output variable. Its array has the same spatial size and the same minibatch size as the input array. The channel dimension has size ``out1 + out3 + out5 + proj_pool``. """ out1 = self.conv1(x) out3 = self.conv3(relu.relu(self.proj3(x))) out5 = self.conv5(relu.relu(self.proj5(x))) pool = self.projp(max_pooling_2d.max_pooling_2d( x, 3, stride=1, pad=1)) y = relu.relu(concat.concat((out1, out3, out5, pool), axis=1)) return y
def __call__(self, h, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. h (~chainer.Variable): The list of the previous cell outputs. Returns: ~chainer.Variable: A list of the outputs (h) of the updated LSTM units over all the layers. """ h_list = [] h = split_axis.split_axis(h, self.num_layers, 1, True) h_curr = x for layer, h in six.moves.zip(self, h): h_curr = layer(h, h_curr) h_list.append(h_curr) return concat.concat(h_list, 1)
def forward_one_step(self, hps, x_data, sensor_data, y_data, train=True): x_r = Variable(x_data[:, 0:3, :, :], volatile=not train) x_s = Variable(sensor_data, volatile=not train) t = Variable(y_data, volatile=not train) cn1_r = F.max_pooling_2d(self.prelu1_r( self.bn1_r(self.conv1_r(x_r), test=not train)), ksize=2, stride=2, pad=0) cn2_r = F.max_pooling_2d(self.prelu2_r( self.bn2_r(self.conv2_r(cn1_r), test=not train)), ksize=2, stride=2, pad=0) cn3_r = F.max_pooling_2d(self.prelu3_r( self.bn3_r(self.conv3_r(cn2_r), test=not train)), ksize=2, stride=2, pad=0) cn5_rm = F.max_pooling_2d(self.prelu5( self.bn5(self.conv5(cn3_r), test=not train)), ksize=2, stride=2, pad=0) sen6 = F.dropout(self.prelu6(self.bn6(self.fc6(x_s), test=not train)), ratio=hps.dropout, train=train) cs7 = self.cccp7( concat.concat( (reshape.reshape(cn5_rm, (cn5_rm.shape[0], 1, cn5_rm.shape[1] * cn5_rm.shape[2] * cn5_rm.shape[3], 1)), reshape.reshape(sen6, (sen6.shape[0], 1, sen6.shape[1], 1))), axis=1)) cs8 = F.dropout(self.prelu8(self.bn8(self.fc8(cs7), test=not train)), ratio=hps.dropout, train=train) y = self.fc9(cs8) return y, F.mean_squared_error(y, t)
def __call__(self, x1, x2, eps=0.001): h = x1 ksizes = x2 h = F.relu(self.bn1(self.conv1(h))) h = F.max_pooling_2d(h, 3, stride=2) # Res Blocks h = self.res2(h) # 1/4 h = self.res3(h) # 1/8 pool1_8 = h h = self.res4(h) # 1/16 pool1_16 = h h = self.res5(h) # 1/32 pool1_32 = h # upscore 1/32 -> 1/4 pool1_32 = self.upscore1(pool1_32) # upscore 1/16 -> 1/4 pool1_16 = self.upscore2(pool1_16) # upscore 1/8 -> 1/4 pool1_8 = self.upscore3(pool1_8) # concat 1 / 4 h = F.leaky_relu( self.bn_upscore( concat.concat((pool1_32, pool1_16, pool1_8), axis=1))) h = F.relu(self.concat_conv(h)) ksizes = F.ceil( F.resize_images((ksizes * 3), (h.data.shape[2], h.data.shape[3]))) h = convolutional_roi_pooling(h, ksizes, out_ksize=5) h = F.relu(self.bn_croip1(self.pool_roi_conv(h))) h = F.relu(self.bn_croip2(self.conv_after_croip(h))) # score h = F.relu(self.score_pool(h)) h = F.relu(self.upscore_final(h)) score = h # 1/1 return score
def __call__(self, x, top_n=None): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. top_n (int): The number of LSTMs from the top whose outputs you want (default: outputs of all LSTMs are returned) Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if top_n is None: top_n = self.num_layers h_list = [] h_curr = x for layer in self: h_curr = layer(h_curr) h_list.append(h_curr) return concat.concat(h_list[-top_n:], 1)
def get_weights(self, hps, x_data, sensor_data, train=False): x_r = Variable(x_data[:, 0:3, :, :], volatile=not train) x_s = Variable(sensor_data, volatile=not train) cn1_r = F.max_pooling_2d(self.prelu1_r( self.bn1_r(self.conv1_r(x_r), test=not train)), ksize=2, stride=2, pad=0) cn2_r = F.max_pooling_2d(self.prelu2_r( self.bn2_r(self.conv2_r(cn1_r), test=not train)), ksize=2, stride=2, pad=0) cn3_r = F.max_pooling_2d(self.prelu3_r( self.bn3_r(self.conv3_r(cn2_r), test=not train)), ksize=2, stride=2, pad=0) cn5_rm = F.max_pooling_2d(self.prelu5( self.bn5(self.conv5(cn3_r), test=not train)), ksize=2, stride=2, pad=0) sen6 = F.dropout(self.prelu6(self.bn6(self.fc6(x_s), test=not train)), ratio=hps.dropout, train=train) cs7 = self.cccp7( concat.concat( (reshape.reshape(cn5_rm, (cn5_rm.shape[0], 1, cn5_rm.shape[1] * cn5_rm.shape[2] * cn5_rm.shape[3], 1)), reshape.reshape(sen6, (sen6.shape[0], 1, sen6.shape[1], 1))), axis=1)) cs8 = F.dropout(self.prelu8(self.bn8(self.fc8(cs7), test=not train)), ratio=hps.dropout, train=train) return cuda.to_cpu(cs8.data)
def __call__(self, x1): h = x1 h = F.relu(self.bn1(self.conv1(h))) h = F.max_pooling_2d(h, 3, stride=2) # Res Blocks h = self.res2(h) # 1/4 h = self.res3(h) # 1/8 h = self.res4(h) # 1/16 pool1_16 = h h = self.res5(h) # 1/32 # upscore 1/32 -> 1/8 h = F.elu(self.bn_up32(self.upscore32(h))) upscore1 = h # 1/8 # upscore 1/16 -> 1/8 upscore2 = F.relu(self.bn_up16(self.upscore16(pool1_16))) # concat conv h = concat.concat((upscore1, upscore2), axis=1) h = F.relu(self.bn_concat(self.concat_conv(h))) concat_pool = F.dropout(h, ratio=0.1) # score h_cls = F.relu(self.bn_cls1(self.score_conv(F.dropout(h, ratio=0.1)))) h_cls = F.relu(self.bn_cls2(self.upscore_final1(h_cls))) score = self.upscore_final2(h_cls) h_cp = F.relu(self.bn_cp2(self.conv_cp2(concat_pool))) h_cp = F.relu(self.bn_cp3(self.upscore_cp1(h_cp))) h_cp = self.upscore_cp2(h_cp) h_ocp = F.relu(self.bn_ocp2(self.conv_ocp2(concat_pool))) h_ocp = F.relu(self.bn_ocp3(self.upscore_ocp1(h_ocp))) h_ocp = self.upscore_ocp2(h_ocp) cp_score = F.tanh(h_cp) ocp_score = F.tanh(h_ocp) return score, cp_score, ocp_score
def __call__(self, x, top_n=None): """Updates the internal state and returns the GRU outputs. Args: x (~chainer.Variable): A new batch from the input sequence. top_n (int): The number of GRUs from the top whose outputs you want (default: outputs of all GRUs are returned) Returns: ~chainer.Variable: A concatenation of the outputs (h) of the updated GRU units over the top N layers; by default all layers are considered. """ if top_n is None: top_n = self.num_layers h_list = [] h_curr = x for layer in self: h_curr = layer(h_curr) h_list.append(h_curr) return concat.concat(h_list[-top_n:], 1)
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. .. warning:: ``train`` and ``use_cudnn`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('use_cudnn', use_cudnn)`` respectively. See :func:`chainer.using_config`. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. Because of bi-direction, the first dimension length is ``2S``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this functions supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = cuda.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple(itertools.chain( (hx, ), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), xs)) if use_bi_direction: rnn = NStepBiGRU(n_layers, states) else: rnn = NStepGRU(n_layers, states) ret = rnn(*inputs) hy, = ret[:1] ys = ret[1:] return hy, ys else: direction = 2 if use_bi_direction else 1 hx = split_axis.split_axis(hx, n_layers * direction, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] xws = [concat.concat([w[0], w[1], w[2]], axis=0) for w in ws] hws = [concat.concat([w[3], w[4], w[5]], axis=0) for w in ws] xbs = [concat.concat([b[0], b[1], b[2]], axis=0) for b in bs] hbs = [concat.concat([b[3], b[4], b[5]], axis=0) for b in bs] xs_next = xs hy = [] for layer in six.moves.range(n_layers): def _one_directional_loop(di): # di=0, forward GRU # di=1, backward GRU xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx]) gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx]) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) h_bar = (1 - z) * h_bar + z * h if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list # Forward GRU h, h_forward = _one_directional_loop(di=0) hy.append(h) if use_bi_direction: # Backward GRU h, h_backward = _one_directional_loop(di=1) h_backward.reverse() # Concat xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in six.moves.zip(h_forward, h_backward)] hy.append(h) else: # Uni-directional GRU xs_next = h_forward ys = xs_next hy = stack.stack(hy) return hy, tuple(ys)
def crf1d(cost, xs, ys, reduce='mean'): """Calculates negative log-likelihood of linear-chain CRF. It takes a transition cost matrix, a sequence of costs, and a sequence of labels. Let :math:`c_{st}` be a transition cost from a label :math:`s` to a label :math:`t`, :math:`x_{it}` be a cost of a label :math:`t` at position :math:`i`, and :math:`y_i` be an expected label at position :math:`i`. The negative log-likelihood of linear-chain CRF is defined as .. math:: L = -\\left( \\sum_{i=1}^l x_{iy_i} + \\ \\sum_{i=1}^{l-1} c_{y_i y_{i+1}} - {\\log(Z)} \\right) , where :math:`l` is the length of the input sequence and :math:`Z` is the normalizing constant called partition function. .. note:: When you want to calculate the negative log-likelihood of sequences which have different lengths, sort the sequences in descending order of lengths and transpose the sequences. For example, you have three input sequences: >>> a1 = a2 = a3 = a4 = np.random.uniform(-1, 1, 3).astype(np.float32) >>> b1 = b2 = b3 = np.random.uniform(-1, 1, 3).astype(np.float32) >>> c1 = c2 = np.random.uniform(-1, 1, 3).astype(np.float32) >>> a = [a1, a2, a3, a4] >>> b = [b1, b2, b3] >>> c = [c1, c2] where ``a1`` and all other variables are arrays with ``(K,)`` shape. Make a transpose of the sequences: >>> x1 = np.stack([a1, b1, c1]) >>> x2 = np.stack([a2, b2, c2]) >>> x3 = np.stack([a3, b3]) >>> x4 = np.stack([a4]) and make a list of the arrays: >>> xs = [x1, x2, x3, x4] You need to make label sequences in the same fashion. And then, call the function: >>> cost = chainer.Variable( ... np.random.uniform(-1, 1, (3, 3)).astype(np.float32)) >>> ys = [np.zeros(x.shape[0:1], dtype=np.int32) for x in xs] >>> loss = F.crf1d(cost, xs, ys) It calculates mean of the negative log-likelihood of the three sequences. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'mean'``, it holds mean of the loss values. Args: cost (Variable): A :math:`K \\times K` matrix which holds transition cost between two labels, where :math:`K` is the number of labels. xs (list of Variable): Input vector for each label. ``len(xs)`` denotes the length of the sequence, and each :class:`~chainer.Variable` holds a :math:`B \\times K` matrix, where :math:`B` is mini-batch size, :math:`K` is the number of labels. Note that :math:`B`\\ s in all the variables are not necessary the same, i.e., it accepts the input sequences with different lengths. ys (list of Variable): Expected output labels. It needs to have the same length as ``xs``. Each :class:`~chainer.Variable` holds a :math:`B` integer vector. When ``x`` in ``xs`` has the different :math:`B`, correspoding ``y`` has the same :math:`B`. In other words, ``ys`` must satisfy ``ys[i].shape == xs[i].shape[0:1]`` for all ``i``. reduce (str): Reduction option. Its value must be either ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable holding the average negative log-likelihood of the input sequences. .. note:: See detail in the original paper: `Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data <https://repository.upenn.edu/cis_papers/159/>`_. """ if reduce not in ('mean', 'no'): raise ValueError( "only 'mean' and 'no' are valid for 'reduce', but '%s' is " 'given' % reduce) assert xs[0].shape[1] == cost.shape[0] n_label = cost.shape[0] n_batch = xs[0].shape[0] alpha = xs[0] alphas = [] for x in xs[1:]: batch = x.shape[0] if alpha.shape[0] > batch: alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0) alphas.append(alpha_rest) b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost) alpha = logsumexp.logsumexp(b_alpha + b_cost, axis=1) + x if len(alphas) > 0: alphas.append(alpha) alpha = concat.concat(alphas[::-1], axis=0) logz = logsumexp.logsumexp(alpha, axis=1) cost = reshape.reshape(cost, (cost.size, 1)) score = select_item.select_item(xs[0], ys[0]) scores = [] for x, y, y_prev in zip(xs[1:], ys[1:], ys[:-1]): batch = x.shape[0] if score.shape[0] > batch: y_prev, _ = split_axis.split_axis(y_prev, [batch], axis=0) score, score_rest = split_axis.split_axis(score, [batch], axis=0) scores.append(score_rest) score += (select_item.select_item(x, y) + reshape.reshape( embed_id.embed_id(y_prev * n_label + y, cost), (batch,))) if len(scores) > 0: scores.append(score) score = concat.concat(scores[::-1], axis=0) loss = logz - score if reduce == 'mean': return _sum.sum(loss) / n_batch else: return loss
def n_step_lstm_base( n_layers, dropout_ratio, hx, cx, ws, bs, xs, train, use_cudnn, use_bi_direction): """Base function for Stack LSTM/BiLSTM functions. This function is used at :func:`chainer.functions.n_step_lstm` and :func:`chainer.functions.n_step_bilstm`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimention of hidden units. cx (chainer.Variable): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimention of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this functions supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. train (bool): If ``True``, this function executes dropout. use_cudnn (bool): If ``True``, this function uses cuDNN if available. use_bi_direction (bool): If ``True``, this function uses Bi-directional LSTM. Returns: tuple: This functions returns a tuple concaining three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is same as ``hx``. - ``cy`` is an updated cell states whose shape is same as ``cx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is mini-batch size for time ``t``, and ``N`` is size of hidden units. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_lstm` :func:`chainer.functions.n_step_bilstm` """ xp = cuda.get_array_module(hx, hx.data) if use_cudnn and xp is not numpy and cuda.cudnn_enabled and \ _cudnn_version >= 5000: states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple(itertools.chain( (hx, cx), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), xs)) if use_bi_direction: rnn = NStepBiLSTM(n_layers, states, train=train) else: rnn = NStepLSTM(n_layers, states, train=train) ret = rnn(*inputs) hy, cy = ret[:2] ys = ret[2:] return hy, cy, ys else: direction = 2 if use_bi_direction else 1 split_size = n_layers * direction hx = split_axis.split_axis(hx, split_size, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] cx = split_axis.split_axis(cx, split_size, axis=0, force_tuple=True) cx = [reshape.reshape(c, c.shape[1:]) for c in cx] xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws] hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws] xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs] hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs] xs_next = xs hy = [] cy = [] for layer in six.moves.range(n_layers): def _one_directional_loop(di): # di=0, forward LSTM # di=1, backward LSTM h_list = [] c_list = [] layer_idx = direction * layer + di h = hx[layer_idx] c = cx[layer_idx] if di == 0: xs_list = xs_next else: xs_list = reversed(xs_next) for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None c_rest = None if layer != 0: x = dropout.dropout(x, ratio=dropout_ratio, train=train) lstm_in = linear.linear(x, xws[layer_idx], xbs[layer_idx]) + \ linear.linear(h, hws[layer_idx], hbs[layer_idx]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_list.append(h_bar) c_list.append(c_bar) return h, c, h_list, c_list h, c, h_forward, c_forward = _one_directional_loop(di=0) hy.append(h) cy.append(c) if use_bi_direction: # BiLSTM h, c, h_backward, c_backward = _one_directional_loop(di=1) hy.append(h) cy.append(c) h_backward.reverse() # concat xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in zip(h_forward, h_backward)] else: # Uni-directional RNN xs_next = h_forward ys = xs_next hy = stack.stack(hy) cy = stack.stack(cy) return hy, cy, tuple(ys)