def patchwise_loss(self, h, a, t):
     self.loss = F.sum(abs(h - F.reshape(t, (-1, 1))))
     self.loss /= self.n_patches
     if self.n_images > 1:
         h = F.split_axis(h, self.n_images, 0)
         a = F.split_axis(a, self.n_images, 0)
     else:
         h, a = [h], [a]
     self.y = h
     self.a = a
Example #2
0
 def norm_vec_sentence_level(d, nn_flag=False, include_norm_term=False):
     dim = d.shape[1]
     d_list = F.split_axis(d, np.cumsum(lengths)[:-1], axis=0)
     max_length = np.max(lengths)
     d_pad = F.pad_sequence(d_list, length=max_length, padding=0.0)
     d_flat = F.reshape(get_normalized_vector(d_pad, None), (-1, dim))
     split_size = np.cumsum(np.full(batchsize, max_length))[:-1]
     d_list = F.split_axis(d_flat, split_size, axis=0)
     d_list = [_d[:_length] for _d, _length in zip(d_list, lengths)]
     d = F.concat(d_list, axis=0)
     return d
Example #3
0
    def forward(self, x, l, train, action):
        if self.xp == np:
            loc = l.data
        else:
            loc = self.xp.asnumpy(l.data)
        margin = self.g_size/2
        loc = (loc+1)*0.5*(self.in_size-self.g_size+1) + margin
        loc = np.clip(loc, margin, self.in_size-margin)
        loc = np.floor(loc).astype(np.int32)

        # Retina Encoding
        hx = crop(x, loc=loc, size=self.g_size)
        hx = F.relu(self.emb_x(hx))

        # Location Encoding
        hl = F.relu(self.emb_l(l))

        # Glimpse Net
        g = F.relu(self.fc_lg(hl) + self.fc_xg(hx))

        # Core Net
        h = self.core_lstm(g) #  LSTM(g + h_t-1)

        # Location Net
        l = F.tanh(self.fc_hl(h))

        if train:
            # sampling location l
            s = F.gaussian(mean=l, ln_var=self.ln_var)
            s = F.clip(s, -1., 1.)

            # location policy
            l1, l2 = F.split_axis(l, indices_or_sections=2, axis=1)
            s1, s2 = F.split_axis(s, indices_or_sections=2, axis=1)
            norm = (s1-l1)*(s1-l1) + (s2-l2)*(s2-l2)
            ln_p = 0.5 * norm / self.var
            ln_p = F.reshape(ln_p, (-1,))

        if action:
            # Action Net
            y = self.fc_ha(h)

            if train:
                return s, ln_p, y
            else:
                return l, None, y
        else:
            if train:
                return s, ln_p, None
            else:
                return l, None, None
    def weighted_loss(self, h, a, t):
        self.loss = 0
        if self.n_images > 1:
            h = F.split_axis(h, self.n_images, 0)
            a = F.split_axis(a, self.n_images, 0)
            t = F.split_axis(t, self.n_images, 0)
        else:
            h, a, t = [h], [a], [t]

        for i in range(self.n_images):
            y = F.sum(h[i] * a[i], 0) / F.sum(a[i], 0)
            self.loss += abs(y - F.reshape(t[i], (1,)))
        self.loss /= self.n_images
        self.y = h
        self.a = a
Example #5
0
def shake_camera(img):
    s0,s1,s2,s3 = img.data.shape
    zerobar = Variable(xp.zeros((s0,s1,4,s3),dtype=np.float32))
    img = F.concat([zerobar, img, zerobar],axis=2)
    randshift=np.random.randint(1,8)
    img = F.split_axis(img, [randshift,randshift+img_w],axis=2)[1]

    zerobar = Variable(xp.zeros((s0,s1,s2,4,1),dtype=np.float32))
    img = F.reshape(img,(s0,s1,s2,s3,1))
    img = F.concat([zerobar, img, zerobar],axis=3)
    randshift=np.random.randint(1,8)
    img = F.split_axis(img, [randshift,randshift+img_w],axis=3)[1]
    img = F.reshape(img,(s0,s1,s2,s3))
     
    return img
Example #6
0
    def compute_vecs(self, word_ids, word_boundaries, phrase_num,
                     char_vecs=None):
        word_ids = my_variable(word_ids, volatile=not self.train)
        word_embs = self.emb(word_ids)     # total_len x dim
        word_embs_reshape = F.reshape(word_embs, (1, 1, -1, self.emb_dim))

        if self.word_level_flag and char_vecs is not None:
            # print char_vecs.data.shape
            # print word_embs.data.shape
            word_embs = F.concat([word_embs, char_vecs], axis=1)
            # print word_embs.data.shape
            dim = self.emb_dim + self.add_dim
            word_embs_reshape = F.reshape(word_embs, (1, 1, -1, dim))

        # 1 x 1 x total_len x dim
        # convolution
        word_emb_conv = self.conv(word_embs_reshape)
        # 1 x dim x total_len x 1
        word_emb_conv_reshape = F.reshape(word_emb_conv,
                                          (self.hidden_dim, -1))
        # max
        word_emb_conv_reshape = F.split_axis(word_emb_conv_reshape,
                                             word_boundaries, axis=1)

        embs = [F.max(word_emb_conv_word, axis=1) for i, word_emb_conv_word in
                enumerate(word_emb_conv_reshape) if i % 2 == 1]
        embs = F.concat(embs, axis=0)
        phrase_emb_conv = F.reshape(embs,
                                    (phrase_num, self.hidden_dim))
        return phrase_emb_conv
Example #7
0
    def translate(self, xs, max_length=100):
        batch = len(xs)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            xs = [x[::-1] for x in xs]
            exs = sequence_embed(self.embed_x, xs)
            h, c, _ = self.encoder(None, None, exs)
            ys = self.xp.full(batch, EOS, 'i')
            result = []
            for i in range(max_length):
                eys = self.embed_y(ys)
                eys = F.split_axis(eys, batch, 0)
                h, c, ys = self.decoder(h, c, eys)
                cys = F.concat(ys, axis=0)
                wy = self.W(cys)
                ys = self.xp.argmax(wy.data, axis=1).astype('i')
                result.append(ys)

        result = cuda.to_cpu(self.xp.stack(result).T)

        # Remove EOS taggs
        outs = []
        for y in result:
            inds = numpy.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs
Example #8
0
def sequence_embed(embed, xs, dropout=0.):
    """Efficient embedding function for variable-length sequences

    This output is equally to
    "return [F.dropout(embed(x), ratio=dropout) for x in xs]".
    However, calling the functions is one-shot and faster.

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        xs (list of :class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): i-th element in the list is an input variable,
            which is a :math:`(L_i, )`-shaped int array.
        dropout (float): Dropout ratio.

    Returns:
        list of ~chainer.Variable: Output variables. i-th element in the
        list is an output variable, which is a :math:`(L_i, N)`-shaped
        float array. :math:`(N)` is the number of dimensions of word embedding.

    """
    x_len = [len(x) for x in xs]
    x_section = numpy.cumsum(x_len[:-1])
    ex = embed(F.concat(xs, axis=0))
    ex = F.dropout(ex, ratio=dropout)
    exs = F.split_axis(ex, x_section, 0)
    return exs
Example #9
0
    def transform(self, images, normalized=False):
        '''Transform image data to latent space.

        Parameters
        ----------
        images : array-like shape (n_images, image_width, image_height,
                                   n_colors)
            Input numpy array of images.
        normalized [optional] : bool
            Normalization flag that specifies whether pixel data is normalized
            to a [0,1] scale.

        Returns
        -------
        latent_vec : array-like shape (n_images, latent_dim)
        '''

        n_samp = images.shape[0]
        x_encoding = images.flatten().reshape((n_samp, -1))
        x_encoding = chainer.Variable(x_encoding)
        if not normalized:
            x_encoding /= 255.
        x_encoded = self._encode(x_encoding)
        mean, std = F.split_axis(x_encoded, 2, 1)
        # Create `latent_dim` N(0,1) normal samples.
        samples = np.random.standard_normal(mean.data.shape).astype('float32')
        if self.flag_gpu:
            samples = cuda.to_gpu(samples)
        samples = chainer.Variable(samples)
        # Scale samples to model trained parameters.
        sample_set = samples * F.exp(0.5*std) + mean

        return sample_set.data
Example #10
0
 def check_forward(self, x_data, ys_data, indices_or_sections, axis):
     x = chainer.Variable(x_data)
     ys = functions.split_axis(x, indices_or_sections, axis)
     for yd, y in zip(ys_data, ys):
         self.assertEqual(y.data.dtype, self.dtype)
         self.assertIsInstance(y.data.shape, tuple)
         gradient_check.assert_allclose(yd, y.data, atol=0, rtol=0)
Example #11
0
    def encode_query(self, x_datas, i2sD, train=True):
        h0L = list(F.split_axis(
            F.dropout(
                self.embed(chainer.Variable(self.xp.array(x_datas, dtype=np.int32), volatile=not train)),
                ratio=self.dropout_ratio, train=train), len(x_datas), axis=0))

        for i in i2sD.keys():
            h0L[i] = self.W_dxQ(i2sD[i])

        placeholder_idx = x_datas.index(self.PH_id)

        # forward
        self.Q_f_LSTM.reset_state()
        for h0 in h0L[:placeholder_idx+1]:
            state = self.Q_f_LSTM(h0)
        forward_out = state
        for h0 in h0L[placeholder_idx+1:]:
            state = self.Q_f_LSTM(h0)
        forward_endout = state

        # backward
        self.Q_b_LSTM.reset_state()
        for h0 in reversed(h0L[placeholder_idx:]):
            state = self.Q_b_LSTM(h0)
        backward_out = state
        for h0 in reversed(h0L[:placeholder_idx]):
            state = self.Q_b_LSTM(h0)
        backward_endout = state

        concat_h = F.concat([forward_out, backward_out, forward_endout, backward_endout], axis=1)
        return self.W_hu(concat_h), self.W_hq(concat_h)
Example #12
0
    def encode_tokens(self, x_datas, i2sD, train=True):
        # Embed, dropout, split into each token (batchsize=1)
        h0L = list(F.split_axis(
            F.dropout(
                self.embed(chainer.Variable(self.xp.array(x_datas, dtype=np.int32), volatile=not train)),
                ratio=self.dropout_ratio, train=train), len(x_datas), axis=0))

        # Replace embedding with dynamic entity representation
        for i in i2sD.keys():
            h0L[i] = self.W_dx(i2sD[i])

        # LSTM. forward order
        forward_outL = []
        self.f_LSTM.reset_state()
        for h0 in h0L:
            state = self.f_LSTM(h0)
            forward_outL.append(state)

        # LSTM. backward order
        backward_outL = []
        self.b_LSTM.reset_state()
        for h0 in reversed(h0L):
            state = self.b_LSTM(h0)
            backward_outL.append(state)

        return forward_outL, backward_outL
Example #13
0
    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        with cuda.get_device_from_array(x1_data):
            c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size),
                                           dtype=self.x1.dtype))
            c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(
            self.link.h.data, [batch], axis=0)
        y2 = self.link(x2)
        with cuda.get_device_from_array(x1):
            c2_expect, y2_expect = \
                functions.lstm(c1_expect,
                               self.link.upward(x2) + self.link.lateral(h1_in))
        testing.assert_allclose(y2.data, y2_expect.data)
        testing.assert_allclose(self.link.h.data[:batch], y2_expect.data)
        testing.assert_allclose(self.link.h.data[batch:], h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        c3_expect, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        testing.assert_allclose(y3.data, y3_expect.data)
        testing.assert_allclose(self.link.h.data, h2_rest.data)
Example #14
0
    def translate(self, xs, max_length=100):
        batch = len(xs)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            xs = [x[::-1] for x in xs]
            exs = sequence_embed(self.embed_x, xs)
            h, c, _ = self.encoder(None, None, exs)
            ys = self.xp.full(batch, EOS, numpy.int32)
            result = []
            for i in range(max_length):
                eys = self.embed_y(ys)
                eys = F.split_axis(eys, batch, 0)
                h, c, ys = self.decoder(h, c, eys)
                cys = F.concat(ys, axis=0)
                wy = self.W(cys)
                ys = self.xp.argmax(wy.array, axis=1).astype(numpy.int32)
                result.append(ys)

        # Using `xp.concatenate(...)` instead of `xp.stack(result)` here to
        # support NumPy 1.9.
        result = chainer.get_device(numpy).send(
            self.xp.concatenate([x[None, :] for x in result]).T)

        # Remove EOS taggs
        outs = []
        for y in result:
            inds = numpy.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs
Example #15
0
def embed_seq_batch(embed, seq_batch, dropout=0., norm_vecs_one=False):
    batchsize = len(seq_batch)
    embs = F.dropout(embed(F.concat(seq_batch, axis=0)), ratio=dropout)
    if norm_vecs_one:
        embs = get_normalized_vector(embs, None)
    e_seq_batch = F.split_axis(embs, batchsize, axis=0)
    # [(len, ), ] x batchsize
    return e_seq_batch
Example #16
0
    def check_backward(self, x_data, indices_or_sections, axis):
        x = chainer.Variable(x_data)
        ys = functions.split_axis(x, indices_or_sections, axis)
        for y in ys:
            y.grad = y.data
        ys[0].backward()

        gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
Example #17
0
    def reset(self, img_feats):
        """Batch of image features to LSTM states and hidden representations.

        """
        h = self.embed_img(img_feats)
        h = F.split_axis(h, h.shape[0], axis=0)
        hx, cx, ys = self.lstm(None, None, h)
        return hx, cx, ys
 def crop(inputs, outsize, offset):
     x = F.identity(inputs)
     crop_axis = [i!=j for i, j in zip(inputs.data.shape, outsize)]
     i = 0
     for index, tf in enumerate(crop_axis):
         if tf:
             _, x, _ = F.split_axis(x, [offset[i], offset[i] + outsize[index]], index)
             i += 1
     return x
Example #19
0
    def check_backward(self, x_data, indices_or_sections, axis):
        x = chainer.Variable(x_data)
        ys = functions.split_axis(x, indices_or_sections, axis)
        # Only set ys[0]
        ys[0].grad = ys[0].data
        ys[0].backward()

        gx = numpy.array([1, 0])
        gradient_check.assert_allclose(gx, x.grad, atol=0, rtol=0)
Example #20
0
    def encode(self, data, test=False):
        x = self.enc(data, test=test)
        mean, ln_var = F.split_axis(x, 2, 1)
        samp = np.random.standard_normal(mean.data.shape).astype('float32')
        samp = Variable(samp)
        if self.flag_gpu:
            samp.to_gpu()
        z = samp * F.exp(0.5*ln_var) + mean

        return z, mean, ln_var
Example #21
0
 def encode(self, bow):
     """ Convert the bag of words vector of shape (n_docs, n_vocab)
     into latent mean log variance vectors.
     """
     lam = F.relu(self.l1(bow))
     pi = F.relu(self.l2(lam))
     mu, log_sigma = F.split_axis(self.mu_logsigma(pi), 2, 1)
     sample = F.gaussian(mu, log_sigma)
     loss = F.gaussian_kl_divergence(mu, log_sigma)
     return sample, loss
Example #22
0
    def check_forward_single(self, inputs, backend_config):
        if backend_config.use_cuda:
            inputs = cuda.to_gpu(inputs)

        x, = self.inputs
        x = chainer.Variable(x)

        with backend_config:
            ys = functions.split_axis(x, 1, self.axis, force_tuple=False)

        assert isinstance(ys, chainer.Variable)
Example #23
0
 def __call__(self, x):
     x = self.c_attn(x)
     query, key, value = F.split_axis(x, 3, axis=2)
     query = self.split_heads(query)
     key = self.split_heads(key, k=True)
     value = self.split_heads(value)
     a = self._attn(query, key, value)
     a = self.merge_heads(a)
     a = self.c_proj(a)
     a = self.resid_dropout(a)
     return a
Example #24
0
 def forward(self, inputs, device):
     x, = inputs
     ret = functions.split_axis(
         x, 1, self.axis, force_tuple=self.force_tuple)
     if self.force_tuple:
         assert isinstance(ret, tuple)
         assert len(ret) == 1
         return ret
     else:
         assert isinstance(ret, chainer.Variable)
         return ret,
Example #25
0
 def __call__(self, x):
     segs = list(itertools.accumulate(
         clf.n_input for clf in self.classifiers
     ))
     if segs:
         xs = cf.split_axis(x, segs, 1)
     else:
         xs = [x]
     
     y = self.segmenter(xs[-1])
     zs = tuple(clf(x) for x, clf in zip(xs[:-1], self.classifiers))
     return y, zs
Example #26
0
 def __call__(self, x, condition):
     length = x.shape[2]
     h = self.conv(x)
     h = h[:, :, :length]  # crop
     h += condition
     tanh_z, sig_z = F.split_axis(h, 2, axis=1)
     z = F.tanh(tanh_z) * F.sigmoid(sig_z)
     if x.shape[2] == z.shape[2]:
         residual = self.res(z) + x
     else:
         residual = self.res(z) + x[:, :, -1:]  # crop
     skip_conenection = self.skip(z)
     return residual, skip_conenection
Example #27
0
    def __call__(self, x, t):
        h = F.relu(self.conv1_1(x))
        h = F.relu(self.conv1_2(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv2_1(h))
        h = F.relu(self.conv2_2(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv3_1(h))
        h = F.relu(self.conv3_2(h))
        h = F.relu(self.conv3_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv4_1(h))
        h = F.relu(self.conv4_2(h))
        h = F.relu(self.conv4_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv5_1(h))
        h = F.relu(self.conv5_2(h))
        h = F.relu(self.conv5_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.fc6(h))
        h = F.relu(self.fc7(h))
        h = self.fc8(h)

        # Channelwise Inhibited
        h = F.split_axis(h, 3, 1)
        c = F.reshape(h[self.c], (x.data.shape[0], 16, 16))
        xp = cuda.get_array_module(x.data)
        volatile = False if t is not None else True
        z = Variable(xp.zeros_like(c.data), volatile=volatile)
        c = F.batch_matmul(c, z)
        c = F.reshape(c, (x.data.shape[0], 1, 16, 16))
        hs = []
        for i, s in enumerate(h):
            if i == self.c:
                hs.append(c)
            else:
                hs.append(s)
        self.pred = F.concat(hs, 1)

        if t is not None:
            self.loss = F.softmax_cross_entropy(self.pred, t)
            self.loss /= 16 * 16
            return self.loss
        else:
            self.pred = F.softmax(self.pred)
            return self.pred
Example #28
0
    def check_backward(self, inputs, backend_config):
        if backend_config.use_cuda:
            inputs = cuda.to_gpu(inputs)

        x, = inputs
        x = chainer.Variable(x)

        with backend_config:
            ys = functions.split_axis(x, self.ys_section, self.axis)
            # Only set ys[0]
            ys[0].grad = ys[0].data
            ys[0].backward()

        gx = numpy.array([1, 0])
        testing.assert_allclose(gx, x.grad, atol=0, rtol=0)
Example #29
0
    def check_backward(self, inputs, backend_config):
        if backend_config.use_cuda:
            inputs = cuda.to_gpu(inputs)

        x, = inputs
        x = chainer.Variable(x)

        with backend_config:
            ys = functions.split_axis(
                x, self.ys_section, self.axis, force_tuple=True)
            for y in ys:
                y.grad = y.data
            ys[0].backward()

        testing.assert_allclose(x.data, x.grad, atol=0, rtol=0)
Example #30
0
    def check_forward(self, inputs, backend_config):
        if backend_config.use_cuda:
            inputs = cuda.to_gpu(inputs)

        x, = inputs
        x = chainer.Variable(x)

        with backend_config:
            ys = functions.split_axis(
                x, self.ys_section, self.axis, force_tuple=True)

        for yd, y in zip(self.ys_expected, ys):
            assert y.data.dtype == self.dtype
            assert isinstance(y.data.shape, tuple)
            testing.assert_allclose(yd, y.data, atol=0, rtol=0)
Example #31
0
    def onestep(self, ys, hx, cx, oxs, hts):
        bs = len(ys)
        emb_ys = self.emb(ys)

        if self.feeding:
            hts = F.stack(hts)
            emb_ys = F.expand_dims(emb_ys, axis=1)
            emb_ys = F.concat((emb_ys, hts), axis=2)
            hy, cy, oys = self.rnn(hx, cx, F.separate(emb_ys))
        else:
            emb_ys = F.split_axis(emb_ys, bs, 0)
            hy, cy, oys = self.rnn(hx, cx, emb_ys)

        oys = F.stack(oys)
        oxs = F.stack(oxs)
        cts = self.attn(oys, oxs)
        cs = F.concat((oys, cts), axis=2)
        hts = F.tanh(F.stack(sequence_linear(self.wc, cs)))
        oys = self.wo(F.concat(hts, axis=0))
        return hy, cy, oys, hts
Example #32
0
    def __call__(self, prev_hg, prev_cg, prev_z, v, r, prev_u):
        v = self.broadcast_v(v)
        if r.shape[2] == 1:
            r = self.broadcast_r(r)

        lstm_input = cf.concat((prev_hg, v, r, prev_z), axis=1)
        gate_inputs = self.lstm(lstm_input)

        forget_gate_input, input_gate_input, tanh_input, output_gate_input = cf.split_axis(
            gate_inputs, 4, axis=1)

        forget_gate = cf.sigmoid(forget_gate_input)
        input_gate = cf.sigmoid(input_gate_input)
        next_c = forget_gate * prev_cg + input_gate * cf.tanh(tanh_input)
        output_gate = cf.sigmoid(output_gate_input)
        next_h = output_gate * cf.tanh(next_c)

        next_u = self.upsample_h(next_h) + prev_u

        return next_h, next_c, next_u
Example #33
0
    def __call__(self, ws, ss, ps, ts):
        """
        xs [(w,s,p,y), ..., ]
        w: word, s: suffix, p: prefix, y: label
        """
        batchsize, length = ts.shape
        ys = self.forward(ws, ss, ps)[1:-1]
        ts = [
            F.squeeze(x, 0) for x in F.split_axis(F.transpose(ts), length, 0)
        ]
        loss = reduce(lambda x, y: x + y,
                      [F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)])

        acc = reduce(
            lambda x, y: x + y,
            [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)])

        acc /= length
        chainer.report({"loss": loss, "accuracy": acc}, self)
        return loss
Example #34
0
	def get_scores(self,candidates,links,relations,edges,xp,mode,RC,EC):
		entities = set()
		for h,r,t,l in candidates:
			entities.add(h)
			entities.add(t)
		entities = list(entities)
		xe = self.get_context(entities,links,relations,edges,0,xp,RC,EC)
		xe = F.split_axis(xe,len(entities),axis=0)
		edict = dict()
		for e,x in zip(entities,xe):
			edict[e]=x
		diffs,rels = [],[]
		for h,r,t,l in candidates:
			rels.append(r)
			diffs.append(edict[h]-edict[t])
		diffs = F.concat(diffs,axis=0)
		xr = self.embedR(xp.array(rels,'i'))
		if self.is_bound_wr:	xr = F.tanh(xr)
		scores = F.batch_l2_norm_squared(diffs+xr)
		return scores
    def get_test_weight(self, in_size=None):
        """
            When testing, do not generate AWN weights and biases, use the current configuration
            Args:
                in_size (int): Input size of the variable to transform
            Returns:
                weights(float[][])
        """
        if self.in_size is None:
            if in_size is None:
                raise ValueError("in_size should not be none for test weights")

            self.in_size = in_size

        if self.no_bias:
            return F.reshape(self.mu, (self.out_size, self.in_size)), None
        else:
            w, b = F.split_axis(self.mu, [self.out_size * (self.in_size - 1)],
                                axis=0)
            return F.reshape(w, (self.out_size, self.in_size - 1)), b
Example #36
0
 def forward(self, x, index_array):
     x = x.reshape(-1, self.dim)
     if self.dropout_rate != 0:
         x = F.dropout(x, ratio=self.dropout_rate)
     z = self.x2z(x)
     if self.activate == 'tanh':
         z = F.tanh(z)
     if self.activate == 'relu':
         z = F.relu(z)
     if self.is_residual:
         z = z + x
     split_array = F.split_axis(z, index_array, axis=0)[:-1]
     a = []
     for i in split_array:
         if len(i) > 0:
             a.append(F.average(i, axis=0))
         else:
             a.append(Variable(np.zeros(self.dim, dtype=np.float32)))
     p = F.stack(a)
     return p
Example #37
0
    def embedding_layer(self, chars, words, train):
        """
        word embeddings = word embedding + character embedding
        """
        if chars is not None:
            chars_len = xp.array([x.shape[0] for x in chars]).astype('i')
            chars_section = xp.cumsum(chars_len[:-1])
            chars = self.char_embed(F.concat(chars, axis=0))
            chars = F.split_axis(chars, chars_section, axis=0)
            _, __, chars_encs = self.bi_char(None, None, chars)
            chars = F.get_item(F.vstack(chars_encs), xp.cumsum(chars_len) - 1)
            words = self.word_embed(F.concat(words, axis=0))
            words = F.concat((words, chars), axis=1)
        else:
            words = F.concat(words, axis=0)

        if train:
            words = F.dropout(words, self.dropout_ratio)

        return words
Example #38
0
    def __call__(self, x, margin_factor=1.0, train=True):
        """
        Embed samples using the CNN, then calculate distances and triplet loss.

        x is a batch of size 3n following the form:

        | anchor_1   |
        | [...]      |
        | anchor_n   |
        | positive_1 |
        | [...]      |
        | positive_n |
        | negative_1 |
        | [...]      |
        | negative_n |
        """
        anc, pos, neg = (self.embed(h) for h in F.split_axis(x, 3, 0))
        dist_pos, dist_neg = self.squared_distance(anc, pos, neg)
        mf = margin_factor if train else 1.0  # no margin when testing
        return self.compute_loss(dist_pos, dist_neg, mf)
Example #39
0
    def advance_state(self, previous_states, prev_y):
        current_mb_size = prev_y.data.shape[0]
        assert self.mb_size is None or current_mb_size <= self.mb_size

        if current_mb_size < len(previous_states[0].data):
            truncated_states = [None] * len(previous_states)
            for num_state in xrange(len(previous_states)):
                truncated_states[num_state], _ = F.split_axis(
                    previous_states[num_state], (current_mb_size, ), 0)
            previous_states = tuple(truncated_states)

        output_state = previous_states[-1]
        if self.decoder_chain.use_goto_attention:
            ci, attn = self.compute_ctxt(output_state, prev_y)
        else:
            ci, attn = self.compute_ctxt(output_state)
        concatenated = F.concat((prev_y, ci))

        new_states = self.decoder_chain.gru(previous_states, concatenated)
        return new_states, concatenated, attn
Example #40
0
    def update_core(self):
        batch = self._iterators['main'].next()
        in_arrays = self.converter(batch, self.device)
        true_x = in_arrays  # mnist (200, 1, 28, 28)

        # create input z as random
        batchsize = true_x.shape[0]
        if self.device == 0:
            z = cuda.cupy.random.normal(size=(batchsize, self.z_dim, 1, 1), dtype=np.float32)
            z = Variable(z)
        else:
            z = np.random.uniform(-1, 1, (batchsize, self.z_dim, 1, 1))
            z = z.astype(dtype=np.float32)
            z = Variable(z)

        # G        -> x1                    ->  y of gen
        #              + -> X -> D -> split
        # Truedata -> x2                    ->  y of true data
        gen_output = self.gen(z) # gen_output (200, 1, 28, 28)
        x = F.concat((gen_output, true_x), 0) # gen_output + true_data = (400, 1, 28, 28)
        dis_output = self.dis(x)
        y_gen, y_data = F.split_axis(dis_output, 2, 0) # 0~1 value (200, 1, 1, 1)

        # DがGの生成物を1(間違い), TrueDataを0(正しい)と判定するように学習させる
        # sigmoid_cross_entropy(x, 0) == softplus(x)
        # sigmoid_cross_entropy(x, 1) == softplus(-x)
        loss_gen = F.sum(F.softplus(-y_gen))
        loss_data = F.sum(F.softplus(y_data))
        loss = (loss_gen + loss_data) / batchsize

        for optimizer in self._optimizers.values():
            optimizer.target.cleargrads()

        loss.backward()

        for optimizer in self._optimizers.values():
            optimizer.update()

        reporter.report({'loss':loss, 'gen/loss':loss_gen / batchsize, 'dis/loss':loss_data / batchsize})

        save_image(gen_output, self.epoch, self.device)
    def __call__(self, batch_size, in_size=None):
        """
            Update the weigths
            Args:
                batch_size (Variable): Size of the current batch
                in_size (int): Input size of the variable to transform
            Returns:
                weight (float[][]), loss (float)
        """
        if self.mu.data is None or self.sigma.data is None:
            if in_size is None:
                raise ValueError(
                    "AdaptiveWeightNoise requires a in_size to intialize it's Parameters"
                )

            self._initialize_params(in_size)

        # Base parameters
        mu_h = F.broadcast_to(F.mean(self.mu), self.mu.shape)
        diff_mu_h = F.square(self.mu - mu_h)
        sigma_h = F.broadcast_to(F.mean(F.exp(self.sigma) + diff_mu_h),
                                 self.sigma.shape)

        # Weight and bias
        eps = variable.Variable(
            self.xp.random.randn(self.mu.size).astype(self.xp.float32))
        W = self.mu + eps * F.sqrt(F.exp(self.sigma))

        # Loss
        loss_x = (F.log(sigma_h) - self.sigma) / 2.
        loss_y = (diff_mu_h + F.exp(self.sigma) - sigma_h) / (
            (2. * sigma_h) + 1e-8)
        loss = F.reshape(F.sum(loss_x + loss_y), (1, )) / batch_size

        # Extract the bias if required
        if self.no_bias:
            return F.reshape(W, (self.out_size, self.in_size)), None, loss
        else:
            w, b = F.split_axis(W, [self.out_size * (self.in_size - 1)],
                                axis=0)
            return F.reshape(w, (self.out_size, self.in_size - 1)), b, loss
Example #42
0
    def train(self, positive, negative, links, relations, edges, xp):
        self.cleargrads()

        entities = set()
        for h, r, t in positive:
            entities.add(h)
            entities.add(t)
        for h, r, t in negative:
            entities.add(h)
            entities.add(t)

        entities = list(entities)

        x = self.get_context(entities, links, relations, edges, 0, xp)
        x = F.split_axis(x, len(entities), axis=0)
        edict = dict()
        for e, x in zip(entities, x):
            edict[e] = x

        pos, rels = [], []
        for h, r, t in positive:
            rels.append(r)
            pos.append(edict[h] - edict[t])
        pos = F.concat(pos, axis=0)
        xr = self.embedR(xp.array(rels, 'i'))
        if self.is_bound_wr: xr = F.tanh(xr)
        pos = F.batch_l2_norm_squared(pos + xr)

        neg, rels = [], []
        for h, r, t in negative:
            rels.append(r)
            neg.append(edict[h] - edict[t])
        neg = F.concat(neg, axis=0)
        xr = self.embedR(xp.array(rels, 'i'))
        if self.is_bound_wr: xr = F.tanh(xr)
        neg = F.batch_l2_norm_squared(neg + xr)

        if self.objective_function == 'relative':
            return sum(F.relu(self.threshold + pos - neg))
        if self.objective_function == 'absolute':
            return sum(pos + F.relu(self.threshold - neg))
Example #43
0
    def translate(self, xs, cur_max_index=1):
        """
        每次保证只传一个视频
        :param xs:
        :param max_length:
        :return:
        """
        max_length = len(xs)
        xs = [xs]
        batch = len(xs)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            xs = [x[::-1] for x in xs]
            exs = sequence_embed(self.embed_x, xs)
            h, c, _ = self.encoder(None, None, exs)
            ys = self.xp.full(batch, EOS, np.int32)
            result = []
            for i in range(max_length):
                eys = self.embed_y(ys)
                eys = F.split_axis(eys, batch, 0)
                h, c, ys = self.decoder(h, c, eys)
                cys = F.concat(ys, axis=0)
                wy = self.W(cys)
                # 想办法生成3个
                tmp = []
                for i in range(cur_max_index):
                    ys = self.xp.argmax(wy.data, axis=1).astype(np.int32)
                    t = wy.data[0][ys]
                    tmp.append((ys, t))
                    wy.data[0][ys] = -1
                # 把它复原,看能不能那么乱
                for i in range(cur_max_index):
                    yss, t = tmp[i]
                    wy.data[0][yss] = t
                result.append(ys)

            # Using `xp.concatenate(...)` instead of `xp.stack(result)` here to
            # support np 1.9.
            result = cuda.to_cpu(
                self.xp.concatenate(
                    [self.xp.expand_dims(x, 0) for x in result]).T)
            return result
Example #44
0
 def get_scores(self, candidates, train_link, relations, aux_link, xp, mode):
     entities = set()
     for h, r, t, l in candidates:
         entities.add(h)
         entities.add(t)
     entities = list(entities)
     xe = self.get_context(entities, train_link, relations, aux_link, 0, xp)
     xe = F.split_axis(xe, len(entities), axis=0)
     edict = dict()
     for e, x in zip(entities, xe):
         edict[e] = x
     diffs, rels = [], []
     for h, r, t, l in candidates:
         rels.append(r)
         diffs.append(edict[h] - edict[t])
     diffs = F.concat(diffs, axis=0)
     xr = F.tanh(self.embedR(xp.array(rels, 'i')))
     # TransE score function
     # f = || h + r - t ||
     scores = F.batch_l2_norm_squared(diffs + xr)
     return scores
Example #45
0
def _n_step_lstm_base(
        n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction,
        recurrent_dropout_ratio=0.0, use_variational_dropout=False, **kwargs):
    if chainer.configuration.config.train and recurrent_dropout_ratio > 0.0:
        if use_variational_dropout:
            return _n_step_rnn_impl(
                F.rnn.n_step_lstm._lstm,
                n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction,
                recurrent_dropout_ratio, use_variational_dropout)
        else:  # drop connect
            ws = [list(w) for w in ws]
            r_ws = []
            for i in range(len(ws)):
                r_ws.extend(ws[i][4:])
            r_ws = F.split_axis(F.dropout(
                F.concat(r_ws), recurrent_dropout_ratio), len(r_ws), axis=1)
            for i in range(len(ws)):
                ws[i][4:] = r_ws[4 * i: 4 * (i + 1)]
    return F.rnn.n_step_lstm.n_step_lstm_base(
        n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction,
        **kwargs)
    def prediction(self, x):
        x = Variable(x)
        ecfp = self.build_ecfp(x)
        fcfp = self.build_fcfp(x)

        ecfp_fcfp = F.concat((ecfp, fcfp), axis=1)
        h1 = self.attention_layer1(ecfp_fcfp)
        #h2 = self.attention_layer2(h1)
        attentions_1 = F.rrelu(self.attention_layer2(h1))
        attentions_2 = F.rrelu(self.attention_layer3(h1))
        attentions = F.concat((attentions_1, attentions_2), axis=1)
        attentions = F.softmax(attentions)
        attentions = F.split_axis(attentions, 2, 1)
        attention_ecfp = attentions[0]
        attention_fcfp = attentions[1]
        attentioned_ecfc = F.concat(
            (attention_ecfp * ecfp, attention_fcfp * fcfp), axis=1)
        #attentioned_ecfc = F.concat((attention_ecfp * ecfp, attention_fcfp * fcfp),axis=1)

        pred = self.dnn(attentioned_ecfc)
        return pred, attention_ecfp, attention_fcfp
    def __call__(self, input_tensor, cur_state, time):
        h_cur, c_cur = cur_state
        # batch_size = h_cur.shape[0]
        combined = F.concat([input_tensor, h_cur],
                            axis=1)  # concatenate along channel axis

        combined_conv = self.conv(combined)
        combined_conv = self.bn_ih(combined_conv, time=time)
        assert combined_conv.shape[1] % self.hidden_dim == 0
        assert combined_conv.shape[1] // self.hidden_dim == 4
        cc_i, cc_f, cc_o, cc_g = F.split_axis(combined_conv,
                                              combined_conv.shape[1] //
                                              self.hidden_dim,
                                              axis=1)
        i = F.sigmoid(cc_i)
        f = F.sigmoid(cc_f)
        o = F.sigmoid(cc_o)
        g = F.tanh(cc_g)
        c_next = f * c_cur + i * g
        h_next = o * F.tanh(c_next)
        return h_next, c_next
Example #48
0
def a():
    vocab_size = 6
    seq_length = 5
    batchsize = 2
    x = np.random.normal(0, 1,
                         size=batchsize * vocab_size * seq_length * 2).reshape(
                             (batchsize, vocab_size, 1,
                              seq_length * 2)).astype(np.float32)
    x = Variable(x)
    x = functions.swapaxes(x, 1, 3)
    x = functions.reshape(x, (batchsize, -1))
    x = functions.split_axis(x, seq_length * 2, axis=1)
    t = np.asarray([
        [1, 2, 3, 4, 5],
        [1, 2, 3, 0, 0],
    ], dtype=np.int32)
    t = Variable(t)
    x_length = Variable(np.asarray([seq_length * 2, seq_length]))
    t_length = Variable(np.asarray([5, 3]))
    loss = ctc.connectionist_temporal_classification(x, t, 0, x_length,
                                                     t_length)
Example #49
0
    def encode_words(self, word_list):

        word_lengths = [len(w) for w in word_list]
        batch_split = np.cumsum(word_lengths[:-1])

        word_vars = [
            chainer.Variable(self.xp.array(w, dtype=self.xp.int32))
            for w in word_list
        ]
        embeddings = self.embed_layer(F.concat(word_vars, axis=0))

        if self.inp_dropout > 0.:
            embeddings = F.dropout(embeddings, ratio=self.inp_dropout)

        # split back to batch size
        batch_embeddings = F.split_axis(embeddings, batch_split, axis=0)
        _, _, hs = self.rnn(None, None, batch_embeddings)
        self.embedding = F.vstack([h[-1] for h in hs])

        for i, word in enumerate(word_list):
            self.cache[tuple(word)] = i
    def __call__(self, vis=None, sos=None, word=None):
        if sos is not None:
            h = self.vis2h(vis) + self.sos2h(sos)
            a, i, o, g = F.split_axis(h, 4, axis=1)
            a = F.tanh(a)
            i = F.sigmoid(i)
            o = F.sigmoid(o)
            g = F.sigmoid(g)
            c = a * i
            h = F.dropout(o * F.tanh(c), ratio=self.dropout_ratio)

            self.af_LSTM.set_state(c, h)

        else:
            word_emb = F.dropout(word, ratio=self.dropout_ratio)
            g = F.sigmoid(self.w2h(word_emb) + self.h2h(self.af_LSTM.h))
            h = F.dropout(self.af_LSTM(word_emb), ratio=self.dropout_ratio)

        s_t = F.dropout(g * F.tanh(self.af_LSTM.c), ratio=self.dropout_ratio)

        return s_t, h
Example #51
0
    def __call__(self, xs, batchsize):
        doc_len = xs[0].shape[0]
        xs_embed = [F.dropout(self.embed(Variable(item)), ratio=0.5) for item in xs]
        hy, cy, ys = self.lstm(hx=None, cx=None, xs=xs_embed)
        ys = [F.dropout(item, ratio=0.25) for item in ys]

        # attentionの計算
        concat_ys = F.concat(ys, axis=0)
        attn = F.dropout(self.l_attn(concat_ys), ratio=0.25)
        split_attention = F.split_axis(attn, np.cumsum([len(item) for item in xs])[:-1], axis=0)
        split_attention_pad = F.pad_sequence(split_attention, padding=-1024.0)
        attn_softmax = F.softmax(split_attention_pad, axis=1)
        ys_pad = F.pad_sequence(ys, length=None, padding=0.0)
        ys_pad_reshape = F.reshape(ys_pad, (-1, ys_pad.shape[-1]))
        attn_softmax_reshape = F.broadcast_to(F.reshape(attn_softmax, (-1, attn_softmax.shape[-1])), ys_pad_reshape.shape)
        attention_hidden = ys_pad_reshape * attn_softmax_reshape
        attention_hidden_reshape = F.reshape(attention_hidden, (batchsize, -1, attention_hidden.shape[-1]))
        result = F.sum(attention_hidden_reshape, axis=1)
        
        y = self.l3(result)
        return y, attn_softmax[0].data
Example #52
0
    def forward(self, xs):
        h = chainer.dataset.convert.concat_examples(xs, padding=-1)
        h = h.transpose(0, 3, 1, 2)                             # (1, 3, 300, 40)

        h = self.c1(h)                                          # (1, 128, 300, 40)
        h = self.b1(h)
        h = self.af(h)
        h = F.max_pooling_2d(h, ksize=(2, 4), stride=(2, 4))    # (1, 128, 150, 10)
        # h = F.dropout(h, ratio=self.dropout_rate)

        h = self.c2(h)                                          # (1, 512, 150, 10)
        h = self.b2(h)
        h = self.af(h)
        h = F.max_pooling_2d(h, ksize=(1, 2), stride=(1, 2))    # (1, 512, 150, 5)
        # h = F.dropout(h, ratio=self.dropout_rate)

        h = h.transpose(0, 2, 1, 3)                             # (1, 150, 512, 5)
        h = h.reshape(len(xs) * h.shape[1], -1)                 # (1 * 150, 6144)
        h = self.cl(h)                                          # (1 * 150, 786)
        h = h.reshape(len(xs), -1, 786)                         # (1, 150, 786)

        last_h, last_c, ys = self.lstm(None, None, [_ for _ in h])
        y_len = [len(y) for y in ys]
        y_section = np.cumsum(y_len[:-1])
        ay = self.a2(F.relu(self.a1(F.dropout(F.concat(ys, axis=0), ratio=self.dropout_rate))))
        ays = F.split_axis(ay, y_section, 0)

        h_list = []
        for y, ay in zip(ys, ays):
            h_list.append(F.sum(y * F.broadcast_to(F.softmax(ay, axis=0), y.shape), axis=0)[None, :])
        h = F.concat(h_list, axis=0)
        # h = F.dropout(h, ratio=self.dropout_rate)

        h = self.l3(h)
        h = self.b3(h)
        h = self.af(h)

        y = self.l4(h)

        return y
Example #53
0
    def predict(self, input_x):
        output = self.predictor(input_x)
        batch_size, input_channel, input_h, input_w = input_x.shape
        batch_size, _, grid_h, grid_w = output.shape
        x, y, w, h, conf, prob = F.split_axis(F.reshape(
            output, (batch_size, self.predictor.n_boxes,
                     self.predictor.n_classes + 5, grid_h, grid_w)),
                                              (1, 2, 3, 4, 5),
                                              axis=2)
        x = F.sigmoid(x)  # xのactivation
        y = F.sigmoid(y)  # yのactivation
        conf = F.sigmoid(conf)  # confのactivation
        prob = F.transpose(prob, (0, 2, 1, 3, 4))
        prob = F.softmax(prob)  # probablitiyのacitivation
        prob = F.transpose(prob, (0, 2, 1, 3, 4))

        # x, y, w, hを絶対座標へ変換
        x_shift = Variable(
            np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape))
        y_shift = Variable(
            np.broadcast_to(
                np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1),
                y.shape))
        w_anchor = Variable(
            np.broadcast_to(
                np.reshape(
                    np.array(self.anchors, dtype=np.float32)[:, 0],
                    (self.predictor.n_boxes, 1, 1, 1)), w.shape))
        h_anchor = Variable(
            np.broadcast_to(
                np.reshape(
                    np.array(self.anchors, dtype=np.float32)[:, 1],
                    (self.predictor.n_boxes, 1, 1, 1)), h.shape))
        #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu()
        box_x = (x + x_shift) / grid_w
        box_y = (y + y_shift) / grid_h
        box_w = F.exp(w) * w_anchor / grid_w
        box_h = F.exp(h) * h_anchor / grid_h

        return box_x, box_y, box_w, box_h, conf, prob
    def __call__(self, xs):
        """
        xs : list(Variable)
        y  : xp.array
        """
        hy, _, _ = self.xh(None, None, xs)
        h = F.relu(
            F.concat(F.concat(F.split_axis(hy, 2, axis=0), axis=2), axis=0))

        h_self = F.dropout(F.relu(self.h_100_self(h)))
        y_self = self.h_1_self(h_self)

        h_qyn = F.dropout(F.relu(self.h_100_qyn(h)))
        y_qyn = self.h_1_qyn(h_qyn)

        h_qw = F.dropout(F.relu(self.h_100_qw(h)))
        y_qw = self.h_1_qw(h_qw)

        h_ayn = F.dropout(F.relu(self.h_100_ayn(h)))
        y_ayn = self.h_1_ayn(h_ayn)

        h_aw = F.dropout(F.relu(self.h_100_aw(h)))
        y_aw = self.h_1_aw(h_aw)

        h_res = F.dropout(F.relu(self.h_100_res(h)))
        y_res = self.h_1_res(h_res)

        h_fil = F.dropout(F.relu(self.h_100_fil(h)))
        y_fil = self.h_1_fil(h_fil)

        h_con = F.dropout(F.relu(self.h_100_con(h)))
        y_con = self.h_1_con(h_con)

        h_req = F.dropout(F.relu(self.h_100_req(h)))
        y_req = self.h_1_req(h_req)

        y = F.concat(
            (y_self, y_qyn, y_qw, y_ayn, y_aw, y_res, y_fil, y_con, y_req))

        return y
Example #55
0
	def __call__(self, X, split_into_variables=True, add_noise_to_input=True):
		xp = self.xp
		batchsize = X.shape[0]
		seq_length = X.shape[1]
		enmbedding = self.embed(X)

		# insert noise at <BLANK> (optional)
		if add_noise_to_input:
			noise = xp.random.normal(0, 1, enmbedding.shape)
			mask = X == BLANK
			mask = xp.broadcast_to(xp.expand_dims(mask, 2), noise.shape)
			enmbedding += noise * mask

		enmbedding = F.swapaxes(enmbedding, 1, 2)
		in_data = []
		if self.ndim_embedding == self.ndim_h:
			in_data.append(enmbedding)

		out_data = self._forward_layer(0, enmbedding)
		in_data.append(out_data)
		for layer_index in xrange(1, self.num_layers):
			out_data = self._forward_layer(layer_index, sum(in_data) if self.densely_connected else in_data[-1])	# dense conv
			in_data.append(out_data)

		out_data = sum(in_data) if self.densely_connected else out_data	# dense conv

		if self.dropout:
			out_data = F.dropout(out_data, ratio=self.dropout)

		out_data = self.dense(out_data)

		if split_into_variables:
			out_data = F.swapaxes(out_data, 1, 2)
			out_data = F.reshape(out_data, (batchsize, -1))
			out_data = F.split_axis(out_data, seq_length, axis=1)
		else:
			out_data = F.swapaxes(out_data, 1, 2)

		return out_data
Example #56
0
    def __call__(self, site_feat, nbr_feat, nbr_feat_idx):
        n_site, n_nbr, n_nbr_feat = nbr_feat.shape
        _, n_site_feat = site_feat.shape
        site_nbr_feat = site_feat[nbr_feat_idx]
        total_feat = functions.concat([
            functions.broadcast_to(site_feat[:, None, :],
                                   (n_site, n_nbr, n_site_feat)),
            site_nbr_feat, nbr_feat
        ],
                                      axis=2)

        total_feat = self.fc(
            total_feat.reshape(n_site * n_nbr, 2 * n_site_feat + n_nbr_feat))
        total_feat = self.bn1(total_feat).reshape(n_site, n_nbr,
                                                  2 * n_site_feat)
        feat_gate, feat_core = functions.split_axis(total_feat, 2, axis=-1)
        feat_gate = functions.sigmoid(feat_gate)
        feat_core = functions.softplus(feat_core)
        feat_sum = functions.sum(feat_gate * feat_core, axis=1)
        feat_sum = self.bn2(feat_sum)
        out = functions.softplus(site_feat + feat_sum)
        return out
Example #57
0
    def forward_nstep_lstm(self, es):
        # e shape = list of (sentence_length, in_channels)
        out_es = []
        es = F.stack(es)  # B, T, D
        es = F.transpose(es, axes=(0, 2, 1))  # B, D, T

        for e in es:
            e = F.transpose(e, axes=(1, 0))  # D, T
            e = F.expand_dims(e, axis=0)  # 1,D,T
            for conv_layer in self.layer_name:
                e = self.act(getattr(self, conv_layer)(e))
            e = F.transpose(e, axes=(0, 2,
                                     1))[0]  # return B, T, D, then [0] = T,D
            e = F.dropout(e, self.dropout_ratio)
            out_es.append(e)
        out_es = F.stack(out_es)  # B,T,D
        return [
            F.squeeze(e) for e in F.split_axis(self.layer_norm(out_es),
                                               out_es.shape[0],
                                               axis=0,
                                               force_tuple=True)
        ]  # return list of (T,D)
Example #58
0
    def __call__(self, prev_hg, prev_cg, prev_z, v, r, u):
        u = self.downsample_u(u)
        v = self.broadcast_v(v)
        if r.shape[2] == 1:
            r = self.broadcast_r(r)

        lstm_input = cf.concat((prev_hg, v, r, prev_z, u), axis=1)
        gate_inputs = self.lstm(lstm_input)

        if self.use_cuda_kernel:
            next_h, next_c = CoreFunction()(gate_inputs, prev_cg)
        else:
            forget_gate_input, input_gate_input, tanh_input, output_gate_input = cf.split_axis(
                gate_inputs, 4, axis=1)

            forget_gate = cf.sigmoid(forget_gate_input)
            input_gate = cf.sigmoid(input_gate_input)
            next_c = forget_gate * prev_cg + input_gate * cf.tanh(tanh_input)
            output_gate = cf.sigmoid(output_gate_input)
            next_h = output_gate * cf.tanh(next_c)

        return next_h, next_c
Example #59
0
    def forward(self, inputs):
        """
        Parameters
        ----------
        inputs: ``torch.autograd.Variable``
            Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.

        Returns
        -------
        Dict with keys:

        ``'activations'``: ``List[torch.autograd.Variable]``
            A list of activations at each layer of the network, each of shape
            ``(batch_size, timesteps + 2, embedding_dim)``
        ``'mask'``:  ``torch.autograd.Variable``
            Shape ``(batch_size, timesteps + 2)`` long tensor with sequence mask.

        Note that the output tensors all include additional special begin and end of sequence
        markers.
        """
        token_embedding = self._token_embedder.forward(inputs)
        type_representation = token_embedding['token_embedding']
        mask = token_embedding['mask']

        lstm_outputs = self._elmo_lstm.forward(type_representation, mask)

        # Prepare the output.  The first layer is duplicated.
        output_tensors = [
            F.concat([type_representation, type_representation], axis=-1)
        ]
        for layer_activations in F.split_axis(lstm_outputs,
                                              lstm_outputs.shape[0],
                                              axis=0):
            output_tensors.append(F.squeeze(layer_activations, 0))

        return {
            'activations': output_tensors,
            'mask': mask,
        }
Example #60
0
    def translate(self, xs, max_length=100):
        n_batch = len(xs)

        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            exs = [self.embed_src(x) for x in xs]

            h, c, memory = self.encoder(None, None, exs)

            memory_dim = memory[0].shape[1]
            memory = [x.data[:, :memory_dim // 2] + x.data[:, memory_dim // 2:] for x in memory]

            h = self.xp.dstack([h.data[i] + h.data[i + 1]
                                for i in range(0, 2 * self.n_layers, 2)]).transpose(2, 0, 1)
            c = self.xp.dstack([c.data[i]
                                for i in range(0, 2 * self.n_layers, 2)]).transpose(2, 0, 1)
            ys = self.xp.full(n_batch, BOS, 'i')

            result = []
            for _ in range(max_length):
                eys = self.embed_dst(ys)
                eys = F.split_axis(eys, n_batch, 0)

                h, c, ys = self.decoder_with_attn(h, c, eys, memory)

                cys = F.concat(ys, axis=0)
                wy = self.fc(cys)
                ys = self.xp.argmax(wy.data, axis=1).astype('i')

                result.append(ys)

        result = cuda.to_cpu(self.xp.vstack(result)).T

        outs = []
        for y in result:
            inds = np.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs